pax_global_header00006660000000000000000000000064126543646210014523gustar00rootroot0000000000000052 comment=426dfb5b4a941453d8a2e2057aef0b2e94377287 rr-4.1.0/000077500000000000000000000000001265436462100121505ustar00rootroot00000000000000rr-4.1.0/.clang-format000066400000000000000000000025561265436462100145330ustar00rootroot00000000000000# BasedOnStyle: Mozilla AccessModifierOffset: -2 ConstructorInitializerIndentWidth: 4 AlignEscapedNewlinesLeft: false AlignTrailingComments: true AllowAllParametersOfDeclarationOnNextLine: false AllowShortIfStatementsOnASingleLine: false AllowShortLoopsOnASingleLine: false AlwaysBreakTemplateDeclarations: false AlwaysBreakBeforeMultilineStrings: false BreakBeforeBinaryOperators: false BreakBeforeTernaryOperators: true BreakConstructorInitializersBeforeComma: false BinPackParameters: true ColumnLimit: 80 ConstructorInitializerAllOnOneLineOrOnePerLine: true DerivePointerBinding: false ExperimentalAutoDetectBinPacking: false IndentCaseLabels: true MaxEmptyLinesToKeep: 1 NamespaceIndentation: None ObjCSpaceBeforeProtocolList: false PenaltyBreakBeforeFirstCallParameter: 19 PenaltyBreakComment: 60 PenaltyBreakString: 1000 PenaltyBreakFirstLessLess: 120 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 200 PointerBindsToType: true SpacesBeforeTrailingComments: 1 Cpp11BracedListStyle: false Standard: Cpp03 IndentWidth: 2 TabWidth: 8 UseTab: Never BreakBeforeBraces: Attach IndentFunctionDeclarationAfterType: false SpacesInParentheses: false SpacesInAngles: false SpaceInEmptyParentheses: false SpacesInCStyleCastParentheses: false SpaceAfterControlStatementKeyword: true SpaceBeforeAssignmentOperators: true ContinuationIndentWidth: 4 rr-4.1.0/.gitignore000066400000000000000000000003751265436462100141450ustar00rootroot00000000000000*~ .cproject CMakeCache.txt CMakeFiles/ cmake_install.cmake CPackConfig.cmake CPackSourceConfig.cmake _CPack_Packages/ CTestTestfile.cmake Debug dist/ install_manifest.txt Makefile Profile obj/ .project *.log *.pyc *.record *.replay .settings/ Testing/ rr-4.1.0/.travis.yml000066400000000000000000000004541265436462100142640ustar00rootroot00000000000000language: c compiler: gcc # XXX tests are disabled for the moment because the travis VM's CPU # type isn't supported, and because there are odd timeouts with no-op # tests script: ./src/script/setup_travis.sh && ./configure && make && make package notifications: email: - rr-builds@mozilla.org rr-4.1.0/CMakeLists.txt000066400000000000000000000620251265436462100147150ustar00rootroot00000000000000# *-* Mode: cmake; *-* cmake_minimum_required(VERSION 2.8.5) project(rr C CXX ASM) enable_testing() set(BUILD_SHARED_LIBS ON) set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib) # CAREFUL! "-" is an invalid character in RPM package names, while # debian is happy with it. However, "_" is illegal in debs, while RPM # is cool with it. Sigh. set(rr_VERSION_MAJOR 4) set(rr_VERSION_MINOR 1) set(rr_VERSION_PATCH 0) add_definitions(-DRR_VERSION="${rr_VERSION_MAJOR}.${rr_VERSION_MINOR}.${rr_VERSION_PATCH}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -O0 -g3 -Wall -Werror -Wstrict-prototypes") # Define __STDC_LIMIT_MACROS so |#include | works as expected. # Define __STDC_FORMAT_MACROS so |#include | works as expected. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__USE_LARGEFILE64 -D__STDC_LIMIT_MACROS -D__STDC_FORMAT_MACROS -std=c++0x -pthread -O0 -g3 -Wall -Werror") set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -g3") # Check that a 32-bit cross-compile works. This is needed regardless # of whether the entire build is being built 32-bit. if(CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") # try_compile won't accept LINK_FLAGS, so do this manually. file(WRITE "${CMAKE_BINARY_DIR}/test32.c" "int main() { return 0; }") execute_process(COMMAND ${CMAKE_CXX_COMPILER} -o ${CMAKE_BINARY_DIR}/test32 ${CMAKE_BINARY_DIR}/test32.c -m32 RESULT_VARIABLE COMPILER_32BIT_RESULT) if(NOT (COMPILER_32BIT_RESULT EQUAL 0)) message(FATAL_ERROR "Your toolchain doesn't support 32-bit cross-compilation.") endif() endif() option(force32bit, "Force a 32-bit build, rather than a 64-bit one") if(force32bit) set(rr_64BIT false) set(rr_MBITNESS_OPTION -m32) else() if (CMAKE_SIZEOF_VOID_P EQUAL 8) set(rr_64BIT true) else() set(rr_64BIT false) endif() set(rr_MBITNESS_OPTION) endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${rr_MBITNESS_OPTION}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${rr_MBITNESS_OPTION}") set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${rr_MBITNESS_OPTION}") find_package(PkgConfig REQUIRED) # If we're cross-compiling a 32-bit build on a 64-bit host we need # to ensure we're looking for the right libraries. # This has been tested on Ubuntu and Fedora. set(LIBDIR32_CANDIDATES /usr/lib/i386-linux-gnu/pkgconfig/ /usr/lib/pkgconfig/ ) if((NOT rr_64BIT) AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") foreach(libdir ${LIBDIR32_CANDIDATES}) if(IS_DIRECTORY ${libdir}) set(ENV{PKG_CONFIG_LIBDIR} ${libdir}) break() endif() endforeach(libdir) if (NOT DEFINED ENV{PKG_CONFIG_LIBDIR}) message(FATAL_ERROR "Couldn't find a suitable 32-bit pkgconfig lib dir. You probably need to install a 32-bit pkgconfig package (pkgconfig.i686 for Fedora or pkg-config:i386 for Ubuntu") endif() endif() # Check for required libraries set(REQUIRED_LIBS zlib ) foreach(required_lib ${REQUIRED_LIBS}) string(TOUPPER ${required_lib} PKG) pkg_check_modules(${PKG} REQUIRED ${required_lib}) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${${PKG}_CFLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${${PKG}_CFLAGS}") endforeach(required_lib) # Check for Python >=2.7 but not Python 3. find_package(PythonInterp 2.7 REQUIRED) if(PYTHON_VERSION_MAJOR GREATER 2) message(FATAL_ERROR "Python 3 is not supported, please use Python 2.7.") endif() # Check for required Python modules set(REQUIRED_PYTHON_MODULES pexpect ) foreach(py_module ${REQUIRED_PYTHON_MODULES}) execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" "import ${py_module}" RESULT_VARIABLE module_status) if(module_status) message(FATAL_ERROR "Couldn't find required Python module ${py_module}.") endif() endforeach(py_module) # Check for gdb execute_process(COMMAND "gdb" "--version" RESULT_VARIABLE module_status) if(module_status) message(FATAL_ERROR "Couldn't find gdb.") endif() set_source_files_properties(src/preload/preload.c PROPERTIES COMPILE_FLAGS -O2) include_directories("${PROJECT_SOURCE_DIR}/include") # We need to know where our generated files are. include_directories("${CMAKE_CURRENT_BINARY_DIR}") add_library(rrpreload src/preload/preload.c src/preload/raw_syscall.S src/preload/syscall_hook.S src/preload/breakpoint_table.S ) # Ensure that CMake knows about our generated files. # # Alphabetical, please. set(GENERATED_FILES AssemblyTemplates.generated CheckSyscallNumbers.generated SyscallEnumsX64.generated SyscallEnumsX86.generated SyscallEnumsForTestsX64.generated SyscallEnumsForTestsX86.generated SyscallHelperFunctions.generated SyscallnameArch.generated SyscallRecordCase.generated ) foreach(generated_file ${GENERATED_FILES}) set_source_files_properties(${generated_file} PROPERTIES GENERATED true HEADER_FILE_ONLY true) add_custom_command(OUTPUT ${generated_file} COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/src/generate_syscalls.py" "${CMAKE_CURRENT_BINARY_DIR}/${generated_file}" DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/generate_syscalls.py" "${CMAKE_CURRENT_SOURCE_DIR}/src/syscalls.py" "${CMAKE_CURRENT_SOURCE_DIR}/src/assembly_templates.py") endforeach(generated_file) add_custom_target(Generated DEPENDS ${GENERATED_FILES}) add_custom_command(OUTPUT rr_page_64 COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/src/generate_rr_page.py" "${CMAKE_CURRENT_BINARY_DIR}/bin/rr_page_64" DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/generate_rr_page.py") add_custom_command(OUTPUT rr_page_32 COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/src/generate_rr_page.py" "${CMAKE_CURRENT_BINARY_DIR}/bin/rr_page_32" DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/generate_rr_page.py") add_custom_command(OUTPUT rr_page_64_replay COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/src/generate_rr_page.py" "${CMAKE_CURRENT_BINARY_DIR}/bin/rr_page_64_replay" DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/generate_rr_page.py") add_custom_command(OUTPUT rr_page_32_replay COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/src/generate_rr_page.py" "${CMAKE_CURRENT_BINARY_DIR}/bin/rr_page_32_replay" DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/generate_rr_page.py") add_custom_target(Pages DEPENDS rr_page_32 rr_page_64 rr_page_32_replay rr_page_64_replay) add_executable(rr src/test/cpuid_loop.S src/AddressSpace.cc src/AutoRemoteSyscalls.cc src/Command.cc src/CompressedReader.cc src/CompressedWriter.cc src/CPUIDBugDetector.cc src/DiversionSession.cc src/DumpCommand.cc src/EmuFs.cc src/Event.cc src/ExtraRegisters.cc src/fast_forward.cc src/FdTable.cc src/Flags.cc src/GdbCommand.cc src/GdbCommandHandler.cc src/GdbConnection.cc src/GdbExpression.cc src/GdbInitCommand.cc src/GdbServer.cc src/HelpCommand.cc src/kernel_abi.cc src/kernel_metadata.cc src/log.cc src/MagicSaveDataMonitor.cc src/main.cc src/Monkeypatcher.cc src/PerfCounters.cc src/PsCommand.cc src/RecordCommand.cc src/RecordSession.cc src/record_signal.cc src/record_syscall.cc src/Registers.cc src/remote_code_ptr.cc src/ReplayCommand.cc src/ReplaySession.cc src/replay_syscall.cc src/ReplayTimeline.cc src/Scheduler.cc src/SeccompFilterRewriter.cc src/Session.cc src/StdioMonitor.cc src/task.cc src/TraceFrame.cc src/TraceStream.cc src/util.cc ) add_dependencies(rr Generated Pages) target_link_libraries(rr -ldl -lrt ${ZLIB_LDFLAGS} ) target_link_libraries(rrpreload -ldl ) add_executable(exec_stub src/exec_stub.c) set_target_properties(exec_stub PROPERTIES LINK_FLAGS -nostdlib) set_source_files_properties(src/exec_stub.c COMPILE_FLAGS "-fno-stack-protector") install(PROGRAMS scripts/signal-rr-recording.sh ${CMAKE_CURRENT_BINARY_DIR}/bin/rr_page_64 ${CMAKE_CURRENT_BINARY_DIR}/bin/rr_page_64_replay ${CMAKE_CURRENT_BINARY_DIR}/bin/rr_page_32 ${CMAKE_CURRENT_BINARY_DIR}/bin/rr_page_32_replay DESTINATION bin) install(TARGETS rr rrpreload exec_stub RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) # Build 32-bit librrpreload on 64-bit builds. # We copy the source files into '32' subdirectories in the output # directory, so we can set different compile options on them. # This sucks but I can't find a better way to get CMake to build # the same source file in two different ways. if(rr_64BIT) foreach(file preload_interface.h) set_source_files_properties(32/${file} PROPERTIES GENERATED true HEADER_FILE_ONLY true) add_custom_command(OUTPUT 32/${file} COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/32 && cp "${CMAKE_CURRENT_SOURCE_DIR}/src/preload/${file}" "${CMAKE_CURRENT_BINARY_DIR}/32/${file}" DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/preload/${file}") endforeach(file) foreach(file preload.c raw_syscall.S syscall_hook.S breakpoint_table.S) set_source_files_properties(32/${file} PROPERTIES GENERATED true COMPILE_FLAGS "-m32 -O2") add_custom_command(OUTPUT 32/${file} COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/32 && cp "${CMAKE_CURRENT_SOURCE_DIR}/src/preload/${file}" "${CMAKE_CURRENT_BINARY_DIR}/32/${file}" DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/preload/${file}" 32/preload_interface.h) endforeach(file) add_library(rrpreload_32 32/preload.c 32/raw_syscall.S 32/syscall_hook.S 32/breakpoint_table.S ) set_target_properties(rrpreload_32 PROPERTIES LINK_FLAGS -m32) target_link_libraries(rrpreload_32 -ldl ) foreach(file exec_stub.c) set_source_files_properties(32/${file} PROPERTIES GENERATED true COMPILE_FLAGS "-m32 -fno-stack-protector") add_custom_command(OUTPUT 32/${file} COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/32 && cp "${CMAKE_CURRENT_SOURCE_DIR}/src/${file}" "${CMAKE_CURRENT_BINARY_DIR}/32/${file}") endforeach(file) add_executable(exec_stub_32 32/exec_stub.c) set_target_properties(exec_stub_32 PROPERTIES LINK_FLAGS "-nostdlib -m32") install(TARGETS rrpreload_32 exec_stub_32 RUNTIME DESTINATION bin LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) endif() ##-------------------------------------------------- ## Testing # A "basic test" consists of a foo.c source file. All basic tests use the # same basic_test.run driver script. The test name is passed as an additional # parameter to the driver script. This script just does # "compare_test EXIT-SUCCESS", i.e. records and replays the program and verifies # that the output of both runs is identical and contains EXIT-SUCCESS. # # NB: you must update this variable when adding a new test source # file. The list is not generated automatically. # # Alphabetical, please. set(BASIC_TESTS 64bit_child _llseek accept alarm alarm2 alsa_ioctl arch_prctl async_segv_ignored async_signal_syscalls2 at_threadexit bad_ip bad_syscall barrier big_buffers block blocked_sigsegv brk brk2 capget chew_cpu chown clock clone clone_bad_stack clone_immediate_exit clone_untraced constructor creat_address_not_truncated desched_blocking_poll dup epoll_create epoll_create1 exec_flags exec_self fadvise fault_in_code_page fcntl_owner_ex fcntl_dupfd fcntl_seals fcntl_sig fd_tracking_across_threads fds_clean flock flock2 fork_brk fork_child_crash fork_stress fxregs getgroups getrandom setitimer getsid gettimeofday grandchild_threads grandchild_threads_main_running grandchild_threads_thread_running grandchild_threads_parent_alive ignored_sigsegv int3 intr_futex_wait_restart intr_poll intr_pselect intr_read_no_restart intr_read_restart intr_sleep intr_sleep_no_restart invalid_fcntl io ioctl legacy_ugid madvise map_fixed memfd_create mincore mknod mlock mmap_discontinuous mmap_private mmap_ro mmap_shared mmap_shared_multiple mmap_shared_subpage mmap_short_file mmap_tmpfs mprotect mprotect_growsdown mprotect_heterogenous mprotect_none mprotect_stack mremap mremap_shrink msg msync multiple_pending_signals multiple_pending_signals_sequential munmap_segv munmap_discontinuous no_mask_timeslice numa old_fork orphan_process pause perf_event personality pthread_rwlocks poll_sig_race prctl prctl_deathsig prctl_name protect_rr_fds prw pthread_condvar_locking ptrace ptrace_attach_null_status ptrace_attach_running ptrace_attach_sleeping ptrace_attach_stopped ptrace_attach_thread_running ptrace_signals ptracer_death ptracer_death_multithread ptracer_death_multithread_peer quotactl rdtsc read_nothing readdir readlink readlinkat readv rlimit robust_futex rusage save_data_fd sched_setaffinity sched_setparam sched_yield sched_yield_to_lower_priority scm_rights seccomp seccomp_null self_sigint sem sendfile set_ptracer set_tid_address setgid setgroups setsid setuid shm sigaction_old sigaltstack sigchld_interrupt_signal sighandler_fork sigill signalfd sigprocmask sigprocmask_in_syscallbuf_sighandler sigprocmask_syscallbuf sigqueueinfo sigreturn sigreturn_reg sigrt sigstop sigstop2 sigsuspend sigtrap simple sioc sock_names_opts splice stack_growth_after_syscallbuf stack_overflow stack_overflow_altstack stack_overflow_with_guard statfs stdout_child stdout_cloexec stdout_dup stdout_redirect strict_priorities switch_read sync syscallbuf_signal_reset syscallbuf_timeslice syscallbuf_timeslice2 sysconf sysctl sysemu_singlestep sysinfo tcgets tgkill thread_stress thread_yield timer timerfd times tiocgwinsz tiocgpgrp truncate tty_ioctls uname unjoined_thread unshare utimes vfork_flush video_capture wait write_race writev xattr zero_length_read ) # A "test with program" consists of a foo.c source file and a foo.run driver # script. See src/test/util.sh to learn how the .run files work. # # NB: you must update this variable when adding a new test source # file. The list is not generated automatically. # # Alphabetical, please. set(TESTS_WITH_PROGRAM abort_nonmain args async_kill_with_threads async_kill_with_threads_main_running async_kill_with_threads_thread_running async_segv async_signal_syscalls async_signal_syscalls_siginfo async_usr1 block_intr_sigchld blocked_bad_ip breakpoint breakpoint_conditions breakpoint_overlap call_function checkpoint_dying_threads checkpoint_mixed_mode clone_interruption clone_vfork conditional_breakpoint_calls conditional_breakpoint_offload condvar_stress crash crash_in_function execve_loop exit_group exit_status explicit_checkpoints fork_syscalls function_calls getcwd goto_event hello ignored_async_usr1 immediate_restart interrupt intr_ptrace_decline link madvise_dontfork main_thread_exit mmap_shared_prot mmap_write mutex_pi_stress nanosleep priority read_big_struct restart_abnormal_exit reverse_continue_breakpoint reverse_continue_multiprocess reverse_continue_process_signal reverse_many_breakpoints reverse_step_long reverse_step_threads reverse_step_threads_break search segfault shared_persistent_file signal_numbers stack_growth step_thread string_instructions string_instructions_replay string_instructions_watch syscallbuf_fd_disabling target_fork target_process term_nonmain term_rr threaded_syscall_spam threads tiocinq unexpected_stack_growth user_ignore_sig vfork watchpoint watchpoint_syscall watchpoint_unaligned ) # A "test without program" is a foo.run driver script only, which does # something with one of the test executables above (or has special rules # to build its own executable). # # NB: you must update this variable when adding a new test source # file. The list is not generated automatically. # # Alphabetical, please. set(TESTS_WITHOUT_PROGRAM async_signal_syscalls_100 async_signal_syscalls_1000 bad_breakpoint break_block break_clock break_clone break_exec break_int3 break_mmap_private break_msg break_rdtsc break_sigreturn break_sync_signal break_thread break_time_slice breakpoint_consistent call_exit check_patched_pthread checkpoint_async_signal_syscalls_1000 checkpoint_mmap_shared checkpoint_prctl_name checkpoint_simple cont_signal cpuid dead_thread_target desched_ticks deliver_async_signal_during_syscalls env_newline exec_stop execp explicit_checkpoint_clone final_sigkill first_instruction fork_exec_info_thr get_thread_list hardlink_mmapped_files parent_no_break_child_bkpt parent_no_stop_child_crash read_bad_mem remove_watchpoint restart_invalid_checkpoint restart_unstable restart_diversion reverse_alarm reverse_continue_exec_subprocess reverse_continue_fork_subprocess reverse_continue_start reverse_finish reverse_step_breakpoint reverse_step_signal reverse_step_threads2 reverse_watchpoint reverse_watchpoint_syscall run_end run_in_function sanity shm_checkpoint signal_stop signal_checkpoint simple_script simple_script_debug simple_winch stack_overflow_debug step1 step_rdtsc step_signal string_instructions_break string_instructions_replay_quirk subprocess_exit_ends_session switch_processes syscallbuf_timeslice_250 trace_version term_trace_cpu term_trace_syscall when ) foreach(test ${BASIC_TESTS} ${TESTS_WITH_PROGRAM}) add_executable(${test} src/test/${test}.c) add_dependencies(${test} Generated) target_link_libraries(${test} -lrt) endforeach(test) add_library(test_lib src/test/test_lib.c ) add_dependencies(test_lib Generated) target_link_libraries(constructor -lrt test_lib) # cpuid test needs to link with cpuid_loop.S add_executable(cpuid src/test/cpuid.c src/test/cpuid_loop.S) add_dependencies(cpuid Generated) target_link_libraries(cpuid -lrt) foreach(test ${BASIC_TESTS} ${OTHER_TESTS}) add_test(${test} bash ${CMAKE_SOURCE_DIR}/src/test/basic_test.run -b ${CMAKE_SOURCE_DIR} ${PROJECT_BINARY_DIR} ${test}) set_tests_properties(${test} PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED") add_test(${test}-no-syscallbuf bash ${CMAKE_SOURCE_DIR}/src/test/basic_test.run -n ${CMAKE_SOURCE_DIR} ${PROJECT_BINARY_DIR} ${test}) set_tests_properties(${test}-no-syscallbuf PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED") endforeach(test) foreach(test ${TESTS_WITH_PROGRAM} ${TESTS_WITHOUT_PROGRAM}) add_test(${test} bash ${CMAKE_SOURCE_DIR}/src/test/${test}.run -b ${CMAKE_SOURCE_DIR} ${PROJECT_BINARY_DIR} ${test}) set_tests_properties(${test} PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED") add_test(${test}-no-syscallbuf bash ${CMAKE_SOURCE_DIR}/src/test/${test}.run -n ${CMAKE_SOURCE_DIR} ${PROJECT_BINARY_DIR} ${test}) set_tests_properties(${test}-no-syscallbuf PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED") endforeach(test) # Run 32-bit tests on 64-bit builds. # We copy the test files into '32' subdirectories in the output # directory, so we can set different compile options on them. # This sucks but I can't find a better way to get CMake to build # the same source file in two different ways. if(rr_64BIT) set_source_files_properties(32/rrutil.h PROPERTIES GENERATED true HEADER_FILE_ONLY true) add_custom_command(OUTPUT 32/rrutil.h COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/32 && mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/bin/32 && cp -f "${CMAKE_CURRENT_SOURCE_DIR}/src/test/rrutil.h" "${CMAKE_CURRENT_BINARY_DIR}/32/rrutil.h" DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/test/rrutil.h") foreach(test ${BASIC_TESTS} ${TESTS_WITH_PROGRAM} cpuid test_lib) set_source_files_properties(32/${test}.c PROPERTIES GENERATED true COMPILE_FLAGS -m32) add_custom_command(OUTPUT 32/${test}.c COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/32 && cp "${CMAKE_CURRENT_SOURCE_DIR}/src/test/${test}.c" "${CMAKE_CURRENT_BINARY_DIR}/32/${test}.c" DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/test/${test}.c" 32/rrutil.h) endforeach(test) foreach(file cpuid_loop.S) set_source_files_properties(32/${file} PROPERTIES GENERATED true COMPILE_FLAGS -m32) add_custom_command(OUTPUT 32/${file} COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/32 && cp "${CMAKE_CURRENT_SOURCE_DIR}/src/test/${file}" "${CMAKE_CURRENT_BINARY_DIR}/32/${file}" DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/test/${file}") endforeach(file) foreach(test ${BASIC_TESTS} ${TESTS_WITH_PROGRAM}) add_executable(${test}_32 32/${test}.c) add_dependencies(${test}_32 Generated) set_target_properties(${test}_32 PROPERTIES LINK_FLAGS -m32) target_link_libraries(${test}_32 -lrt) endforeach(test) add_library(test_lib_32 32/test_lib.c ) add_dependencies(test_lib_32 Generated) set_target_properties(test_lib_32 PROPERTIES LINK_FLAGS -m32) target_link_libraries(constructor_32 -lrt test_lib_32) # cpuid test needs to link with cpuid_loop.S add_executable(cpuid_32 32/cpuid.c 32/cpuid_loop.S) add_dependencies(cpuid_32 Generated) set_target_properties(cpuid_32 PROPERTIES LINK_FLAGS -m32) target_link_libraries(cpuid_32 -lrt) foreach(test ${BASIC_TESTS} ${OTHER_TESTS}) add_test(${test}-32 bash ${CMAKE_SOURCE_DIR}/src/test/basic_test.run -b ${CMAKE_SOURCE_DIR} ${PROJECT_BINARY_DIR} ${test}_32) set_tests_properties(${test}-32 PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED") add_test(${test}-32-no-syscallbuf bash ${CMAKE_SOURCE_DIR}/src/test/basic_test.run -n ${CMAKE_SOURCE_DIR} ${PROJECT_BINARY_DIR} ${test}_32) set_tests_properties(${test}-32-no-syscallbuf PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED") endforeach(test) foreach(test ${TESTS_WITH_PROGRAM} ${TESTS_WITHOUT_PROGRAM}) add_test(${test}-32 bash ${CMAKE_SOURCE_DIR}/src/test/${test}.run -b ${CMAKE_SOURCE_DIR} ${PROJECT_BINARY_DIR} ${test}_32) set_tests_properties(${test}-32 PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED") add_test(${test}-32-no-syscallbuf bash ${CMAKE_SOURCE_DIR}/src/test/${test}.run -n ${CMAKE_SOURCE_DIR} ${PROJECT_BINARY_DIR} ${test}_32) set_tests_properties(${test}-32-no-syscallbuf PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED") endforeach(test) endif() include(ProcessorCount) ProcessorCount(N) if(NOT N EQUAL 0) set(JFLAG -j${N}) endif() add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --verbose ${JFLAG}) # Run only syscallbuf-enabled and native-bitness tests add_custom_target(fastcheck COMMAND ${CMAKE_CTEST_COMMAND} --verbose --exclude-regex '[-]' ${JFLAG}) ##-------------------------------------------------- ## Package configuration include (InstallRequiredSystemLibraries) set(CPACK_PACKAGE_NAME "rr") set(CPACK_PACKAGE_VERSION_MAJOR "${rr_VERSION_MAJOR}") set(CPACK_PACKAGE_VERSION_MINOR "${rr_VERSION_MINOR}") set(CPACK_PACKAGE_VERSION_PATCH "${rr_VERSION_PATCH}") set(CPACK_SYSTEM_NAME "${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_OUTPUT_FILE_PREFIX dist) set(CPACK_GENERATOR "TGZ;RPM;DEB") set(CPACK_SOURCE_GENERATOR "TGZ") set(CPACK_BINARY_DIR "${PROJECT_BINARY_DIR}") set(CPACK_STRIP_FILES TRUE) set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/LICENSE") set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Lightweight tool for recording and replaying execution of applications (trees of processes and threads)") set(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_SOURCE_DIR}/README.md") set(CPACK_PACKAGE_VENDOR "Mozilla Foundation") set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Mozilla Foundation") set(CPACK_DEBIAN_PACKAGE_SECTION "devel") if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64") set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "amd64") elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "i.86") set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "i386") elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm.*") set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "arm") endif() # XXX Cmake 2.8.7 doesn't know how to avoid specifiying /usr, # /usr/bin, etc, as files to be installed, but distros are finicky # about their specification. We want to manually filter those paths # out of our install list but 2.8.7 also isn't capable of that. set(CPACK_RPM_USER_BINARY_SPECFILE "${CMAKE_SOURCE_DIR}/rr.spec") set(CPACK_RPM_PACKAGE_RELEASE 1) set(CPACK_RPM_PACKAGE_GROUP "Development/Debuggers") set(CPACK_RPM_PACKAGE_LICENSE "MIT and BSD") include (CPack) ##-------------------------------------------------- ## Misc add_custom_target(setup-travis COMMAND src/script/setup_travis.sh) rr-4.1.0/CONTRIBUTING.md000066400000000000000000000051141265436462100144020ustar00rootroot00000000000000Please make sure you go through this list before submitting a patch. The rules aren't hard and fast, but mostly adhering to them will make for quicker mergings. - [ ] Does your PR add support for a new kernel API? For example, supporting a new syscall. If so, your patch should include at least one new test for the API. This is usually pretty easy. See `$rr/src/test` for examples. - [ ] Did you run the rr test suite (including your new tests, if any), and pass all the tests? `make -C $objdir check`. Unfortunately, rr doesn't have automated infrastructure that can run the tests yet, so developers have to run them locally. - [ ] If you created new files for your PR, did you `git add` them? Habitually (or with a script or push hook) checking `git status` is a good habit to acquire. - [ ] If you changed the trace layout or format, did you bump `TRACE_VERSION_NUMBER`? - [ ] If you added new command-line parameters, did you update `print_usage()` to document them? - [ ] Does your PR apply cleanly on top of upstream/master HEAD? It's dangerous to have someone else sort out your merge conflicts, so just don't do it. Best of all is to have a PR *rebased* on top of upstream/master HEAD, so that the merge is simply a fast-forward. - [ ] If your PR includes multiple changesets, do they all (i) build cleanly in sequence; (ii) pass all tests in sequence? This is important for bisecting over commit history. - [ ] If your PR is a very large-scale change (for example, a rewrite in Rust to use the visitor pattern), did you discuss the proposed changes in an issue or the mailing list? It's hard to review large patches that just fall in ones lap. It's much easier to discuss the important changes at a high level and then approach the patch knowing what's important and what's not. - [ ] If your PR is large or includes many changesets, would it have been possible to break the changes into a series of smaller PRs? For example, it's hard to review a big patch that, say, fixes whitespace errors in a file along with a one-line, important, bug fix. It's much easier to review one PR that fixes whitespace (which can just be skimmed), and then review another PR that makes the one-line bug fix (which would be scrutinized more). This approach is also better for the patch author in that it usually allows the work to land faster, and reduces the burden of continually un-bit-rotting large, trivial, changes. - [ ] Did you check your code is formatted correctly? It's easiest to run ```` find src include -name '*.cc' -or -name '*.h' -or -name '*.c'|xargs clang-format -i -style=file ```` on each commit. rr-4.1.0/Dockerfile000066400000000000000000000006271265436462100141470ustar00rootroot00000000000000FROM ubuntu:14.04 MAINTAINER Ted Mielczarek RUN dpkg --add-architecture i386 RUN apt-get update && apt-get install -qq linux-libc-dev linux-libc-dev:i386 gcc-multilib libc6-dev:i386 rpm lib32stdc++6 zlib1g:i386 zlib1g-dev:i386 python-pexpect build-essential gcc g++ gcc-4.8 g++-4.8 cmake pkg-config zlib1g-dev gdb cpp cpp-4.8 RUN ln -s /usr/lib32/libstdc++.so.6 /usr/lib32/libstdc++.so rr-4.1.0/LICENSE000066400000000000000000000051071265436462100131600ustar00rootroot00000000000000Copyright (c) 2013 Mozilla Foundation Copyright 2015 VMware, Inc Copyright 2015 Google Inc. Contributors: Albert Noll , Thomas Anderegg , Nimrod Partush Andrew Walton Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. /* * Copyright 2002 Niels Provos * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ rr-4.1.0/README.md000066400000000000000000000015271265436462100134340ustar00rootroot00000000000000[![Build Status (travis)](https://travis-ci.org/mozilla/rr.svg?branch=master "Travis")](https://travis-ci.org/mozilla/rr) [![Build Status (Jenkins)](http://45.55.219.138:8080/job/rr/badge/icon "Jenkins")](http://45.55.219.138:8080/job/rr/) rr is a lightweight tool for recording and replaying execution of applications (trees of processes and threads). More information about the project, including instructions on how to install, run, and build rr, is at [http://rr-project.org](http://rr-project.org). Or go directly to the [installation and building instructions](https://github.com/mozilla/rr/wiki/Building-And-Installing). Please contribute! Make sure to review the [pull request checklist](/CONTRIBUTING.md) before submitting a pull request. If you find rr useful, please [add a testimonial](https://github.com/mozilla/rr/wiki/Testimonials). rr-4.1.0/configure000077500000000000000000000001201265436462100140500ustar00rootroot00000000000000#!/bin/bash # Helper to make |./configure && make| do what you expect. cmake . rr-4.1.0/doc/000077500000000000000000000000001265436462100127155ustar00rootroot00000000000000rr-4.1.0/doc/res/000077500000000000000000000000001265436462100135065ustar00rootroot00000000000000rr-4.1.0/doc/res/syscall-buffer.svg000066400000000000000000000251101265436462100171470ustar00rootroot00000000000000ptrace traps vs. syscall bufferingptrace trapssyscallsyscallrrtraprrtraprrtraprrtraprrtrapsyscall bufferingsyscallsyscallsyscallsyscallsyscallbufferflushrrbufferbufferbuffersyscallsyscallsyscallrr-4.1.0/doc/rr.html000066400000000000000000001216231265436462100142330ustar00rootroot00000000000000 rr

How rr works

rr records nondeterministic executions and debugs them deterministically.

Practical tool; version 1.2 is latest release. Used to debug Firefox.

Why?

Deterministic debugging: record nondeterministic failure once, debug deterministically forever.

Record intermittent test failures "at scale" online, debug the recordings offline at leisure.

Omniscient debugging: issue queries over program state changes; go backwards in time.

Overview

rr record prog --args
saves recording

rr replay
debugger socket drives replay of most recent recording

Most of an application's execution is deterministic.

rr records the nondeterministic parts.

Examples of nondeterministic inputs

  • clock_gettime(...&now);
  • read(fd, buf, 4096);
  • __asm__("rdtsc")
  • ioctl(...)
  • UNIX signals...

Then during replay, emulate system calls and rdtsc by writing the saved nondeterministic data back to the tracee.

Shared-memory multitasking is a nondeterministic "input".

... but modern hardware can't record it efficiently. So rr doesn't record truly parallel executions.

Scheduling tasks

Can switch tasks at syscalls. Must preempt straight-line code too; and replay the preemptions deterministically.

Hardware performance counters (HPCs)

Recent chips count instructions-retired, branches-retired, ..., and can be programmed to interrupt after a count of x.

Simulate task preemption with HPC interrupts.

Idea: program insns-retired counter to interrupt after k . That k approximates a time slice.

Replaying preemption

Record the insn-retired counter value v to the trace file. During replay, program the interrupt for v. Voilà.

UNIX signals are recorded and replayed like task preemptions.

Record counter value v and signum. Replay by interrupting after v and "delivering" signum.

System requirements

Basic requirements

  • Intel chip with Nehalem (2010) or later µarch1. VM with perf counter virtualization is OK.
  • x86 userspace. x86-64 kernel is OK.
  • linux with PTRACE_INTERRUPT support: ≥ 3.4
  • (strongly encouraged) linux with seccomp-bpf support: ≥ 3.5
1 Some Haswell chips don't work with rr (yet).

rr touches low-level details of machine architecture, by necessity; f.e. kernel syscall ABI.

Supporting more ISAs is "just work". x86-64 coming.

ARM chips don't have the performance counters that rr requires.

So no ARM support is possible at the moment.

Precise HPC events identify points in execution.

Precise replay of signals and preemption requires interrupting tracees at these events.

Performance counters are messier in reality

  • Insns-retired counter is imprecise. Use precise retired-branch counter instead.
  • Counter interrupts can overshoot. Subtract a "skid region".
  • (So replay point is technically indeterminate. But doesn't seem to be a problem in practice, yet.)

seccomp-bpf enables rr to selectively trace syscalls.

Only trap to rr for syscalls that can't be handled in the tracee. Over 100x faster in µbenchmarks.

Buffer syscalls; flush buffer as "super event"

Recorder implementation

Tasks are controlled through the ptrace API.

HPCs are controlled through the perf event API.

The first traced task is forked from rr. After that, clone() and fork()from tracees add new tasks.

And tasks die at exit().

Simplified recorder loop

    while live_task():
        task t = schedule()
        if not status_changed(t):
            resume_execution(t)
        state_change(t)
    
src/recorder.cc

Scheduling a task

    task schedule():
        for each task t, round-robin:
            if is_runnable(t)
               or status_changed(t):
                return t
        tid = waitpid(ANY_CHILD_TASK)
        return task_map[tid]
  
src/recorder_sched.cc

Tasks changing status

    bool status_changed(task t):
        # Non-blocking
        return waitpid(t.tid, WNOHANG)

    # Deceptively simple: includes
    # syscalls, signals, ptrace
    # events ...
  
src/task.cc

Resuming task execution

Invariant: At most one task is running userspace code. All other tasks are either idle or awaiting completion of a syscall.

Multiple running tasks suffer from shared-memory hazards.

rr doesn't attempt to record these hazards, so can't replay them deterministically.

Resuming a task, simplified

    void resume_execution(task t):
        ptrace(PTRACE_SYSCALL, t.tid)
        waitpid(t.tid)  # Blocking

    # Again, deceptively simple: traps
    # for syscalls, signals, ptrace
    # events ...
  
src/{recorder, task}.cc

Most recorder work is done for state_change(task t).

But before looking at it, a few digressions ...

Generating time-slice interrupts

  • perf_event_open() fd for retired-conditional-branches; details are µarch specific
  • Set event "sample period" to k
  • Make event fd O_ASYNC and set tracee task as owner
  • tracee sent SIGSTKFLT at rbc ≈ k
src/hpc.cc

Trapping tracees at rdtsc

  • prctl(PR_SET_TSC, PR_TSC_SIGSEGV) → tracees executing rdtsc trap to SIGSEGV
  • rr examines which instruction triggered SIGSEGV
  • if rdtsc, value is recorded by rr tracer and tracee insn is emulated

Tracees generate ptrace events by executing fork, clone, exit, and some other syscalls.

ptrace events exist for linux reasons that aren't interesting.

(rr tracees can share memory mappings with other processes.

Not possible to record efficiently in SW; needs kernel and/or HW support. Unsupported until then.)

Tracee events recorded by state_change()

  • "Pseudo"-signals delivered by implementation of rdtsc or time-slice interrupts
  • Other, "real", signals
  • ptrace events
  • Syscall entry and exit

Some syscalls must be executed atomically; can't switch task until syscall finishes.

Ex: mmap modifies address space, can race other syscalls.

On the other hand, some syscalls require switching; syscall can't finish until the task switches.

Ex: waitpid() only returns after child runs, changes state.

Problem: kernel writes non-atomic syscall outparams in an order that rr can't record.

Kernel outparam writes race rr tracees in userspace, syscalls.

Solution: allocate scratch space for the outparams of non-atomic syscalls. At syscall exit, write scratch data back to outparams.

→ rr orders outparam writes

POSIX signals can arrive at practically any point in execution and invoke signal handler code.

→ tracee code can be (almost) arbitrarily re-entered

Linux exits tracees out of syscalls with an ERESTART* error code before delivering signals. Syscall not always restarted after signal.

Sighandler nesting gets complex.

When a signal becomes pending

  • consult sighandler table to see if there's a registered sighandler function
  • if so, SINGLESTEP into the sighandler frame and record the struct sigframe set up by kernel. Also record sighandler registers.
  • otherwise, deliver the signal using the ptrace API

Sighandlers exit using the SYS_sigreturn syscall. rr uses these calls to help determine whether interrupted syscalls are restarted.

Tracees exit in unpredictable order at fatal signals like SIGABRT. Naïve waitpid() calls deadlock.

exit_group: same problem.

"Unstable" tracee exit

rr solves this by detaching and not waiting on affected tracees.

Breaks rr scheduling invariant.

Syscall buffer

ptrace traps are expensive. Better to do as much work in tracee process as possible.

Use seccomp-bpf to selectively trap syscalls.

Syscall hooks are LD_PRELOAD'd into tracees.

Hook functions record kernel return value and outparam data to the syscall buffer.

rr monkeypatches __kernel_vsyscall() in vdso to jump to rr trampoline.

Trampoline calls dispatcher, which calls rr hook if available.

Untraced syscalls are recorded to syscallbuf by tracee. Traced events recorded by the rr process "flush" the tracee's syscallbuf.

Lib falls back on traced syscalls.

Simplified example of syscallbuf hook function

static int sys_close(int fd)
{
	long ret;
	if (!start_buffer_syscall(SYS_close))
		/* Fall back on traced syscall.
                 * This generates a ptrace trap. */
		return syscall(SYS_close, fd);

	/* Untraced syscall. Does not generate
         * ptrace trap.*/
	ret = untraced_syscall1(SYS_close, fd);
        /* Save the result to syscall buffer. */
	return commit_syscall(SYS_close, ret);
}
  

How untraced syscalls are made

  • Create single "untraced" kernel entry point
    asm("_untraced_syscall:\n\t"
        "int $0x80");
          
  • Install seccomp-bpf filter that passes calls from _untraced_syscall, traps to rr otherwise.

seccomp-bpf traps generate PTRACE_EVENT_SECCOMP in tracer process.

rr can then PTRACE_SYSCALL the tracee into traced syscall.

Problem: buffered syscalls don't trap to rr, by design. But may-block syscalls (f.e. waitpid()) require rr to schedule another task.

perf events to the rescue: "descheduled" event

Set event to fire on tracee context switch. Event traps to rr.

Buffered syscall blocks → context switch → rr trap

Generating desched events

  • In tracee, perf_event_open() fd for context-switch counter
  • Set event "sample period" to 1 (i.e. next context switch) just before buffered syscall
  • Disarm event just after buffered syscall.
  • tracee sent SIGSYS if context switched during buffered syscall
src/preload/preload.c

Saved traces

Traces are saved to $XDG_DATA_HOME/rr, or ~/.rr.

Stored on disk uncompressed. Trace compression is planned.

Trace directory contents

  • args_env: command-line args and environment pairs used to execvpe() initial tracee
  • events: sequence of syscalls, signals, and various other execution events
  • mmaps: metadata about mmap'd files
  • data/data_header: all recorded data, along with metadata about when it was recorded
  • version: trace format version number

Replayer implementation

Emulate most syscalls using trace data.

Actually execute a small number.

Built around PTRACE_SYSEMU

SYSCALL runs tracee to syscall, executes it.

SYSEMU runs to syscall, doesn't execute it. rr replays side effects.

Replaying time-slice interrupts, in theory

Program instructions counter to interrupt after the recorded t number of instructions.

Tracee stops at t.

Replaying time-slice interrupts, in practice

  • have to use retired-conditional-branch counter (RBC)
  • RBC interrupts aren't precise. Can overshoot by up to 70 branches.
  • RBC counter value doesn't uniquely identify a point in execution (unlike retired-insn counter value)

Finding execution target, in practice

  • program RBC interrupt for target-rbc - SKID_SIZE
  • after RBC interrupt, set breakpoint on target $ip to avoid single-stepping when possible
  • when breakpoint hit, compare RBC value and register files to guess if at execution target. If RBC and regs match what was recorded, done.

To reiterate, this is not sound.

Deterministic signals were raised by program execution. For example, *NULL = 42;

Replayed "naturally" in the course of execution.

Async signals were raised externally at some execution point during recording.

Replay to that execution point just as for time-slice interrupts.

Replay signal delivery by emulating

If there was a sighandler, restore recorded sigframe and registers at sighandler entry.

Otherwise, nothing else to do.

Replaying buffered syscalls

Read saved buffer from trace.

Replay each syscall as normal, but restore outparam data from records in the read buffer.

Debugger interface

Common commands supported.

c, s, si, b, bt, watch, info regs, thr, info thr ...

(rr) call foo() can cause replay divergence.

So you're not allowed to do it … for now. Support coming.

Small stub translates from and to gdb remote protocol.

Then passes debugger requests up to rr replayer.

src/debugger_gdb.cc

Replayer fulfills requests using ptrace() or cached data.

And resumes tracee execution when asked.

src/replayer.cc

Breakpoints, int $3, stepi, watchpoints all raise SIGTRAP.

$ip, breakpoint table, gdb request, and $DR6 decode trap.

src/replayer.cc

Future work

Checkpointing

Make a "deep fork" of tracee tree during replay.

Run code (or whatever) in copied tree, return to original.

Omniscient debugging (aka chroniclerr)

Use chronicle-style instrumentation to generate execution DB.

Query state changes in DB.

CHESS-style execution search; targeted recording

At each scheduling decision, make a checkpoint.

If execution reaches bad state, done. Else, resume checkpoint.

Other projects

  • Copy traces across machines
  • Integrate hardware shared-memory multithreading recorder like QuickRec
  • Record ptrace API; rr record rr record
  • Handle GPU drivers (NVIDIA, ATI, ...)
  • Port to Darwin kernel
  • Port to Windows NT kernel
  • ARM port not possible with current generation of chips

Thanks from the rr team!

Appendix: rr for RnR people

Release 1.2 available today at

rr-project.org

Use cases

  • Run on modern, commodity hardware and software: dnf install rr; rr record
  • Aspire to general tool, focus on Firefox initially
  • Record nondeterministic test failures at scale (e.g., Firefox build/test infra), debug offline
  • "Super-debugger" for local development
  • Search execution space to actively find bugs

Design concerns

  • Commodity HW → only record single HW thread (for now!)
  • Commodity SW → stick to higher-level userspace APIs (e.g., ptrace, PEBS)
  • Record tests at scale → record perf must be "economical", but not mission-critical
  • "Super-debugger" → the usual, plus queries over execution history; pretty fast replay
  • Search exe space → flexible scheduling and checkpointing

rr recorder overview

  • Record "applications" consisting of linux tasks
  • Schedule CPU slices by programming precise counter interrupt (retired branches, RBC) for k
  • Time slice is special case of signal: time-slice recorded as (0, rec-RBC), signals (signum, rec-RBC)
  • Record kernel-created signal stack
  • Syscall "outparams" and rdtsc generate trace traps; results saved to log
  • Plus a "faster" mode we'll cover later

Trade-off: scheduling from userspace

  • While it's great to fully control scheduling ...
  • ... we have to approximate timeslices; can be unfair
  • ... interactive programs don't "feel" native; rr has its own heuristics
  • ... can be slower
  • Future work is to have the option of both

Headache: kernel writes racing with userspace

  • rr doesn't (can't efficiently) record reads/writes of memory shared outside of tracee application.
  • But there are still hazards with the kernel:
  • ... kernel-write/task-read hazard on syscall outparam buffers → rr replaces user buffers with scratch and serializes writes
  • ... kernel-write/task-read on random futexes, e.g. CLONE_CHILD_CLEARTID → no good solution yet; usleep…

rr replayer overview

  • Replay signal/time-slice (signum, rec-RBC) by programming interrupt for rec-RBC
  • Emulate signals by restoring signal stack and regs
  • Emulate syscalls by restoring outparams where possible
  • Execute non-emulatable clone/mmap/et al. as required
  • Serve debugger requests (maybe covered later)

Replayer headache: slack in counter interrupts

  • Interrupt programmed for RBC = k may actually fire at up to RBC = k + slack
  • (Slack empirically seen to be >= 70 branches)
  • So we program interrupt for RBC = k - slack and then advance by breakpoint+stepi
  • "At target" when rec-RBC == rep-RBC and [rec-regs] == [rep-regs]
  • Replay target therefore technically indeterminate

Recorder "fast mode": syscall buffering

  • ptrace traps are slow
  • Idea: avoid them when possible by buffering log data in tracee task
  • Implementation: LD_PRELOAD a helper library with hooks for common syscalls (read, write, gettimeofday, etc.)
  • Hook makes fast untraced syscall, saves outparams in task-local buffer
  • Flush buffer at traced event (including buffer overflow)

Headache: many syscalls made internally in glibc

  • Those syscalls can't be wrapped by usual approach of interposing exported symbol using LD_PRELOAD
  • Solution: monkeypatch __kernel_vsyscall() in vdso.
  • Syscalls directly made through int $0x80 still can't be buffered.
  • We hope this terrible hack evolves into kernel support.

Headache: buffering syscalls that may block

  • read/write/… may block if buffer empty/full/…
  • But, untraced syscall from wrapper means no trap to rr for scheduling arbitration
  • If another tracee is blocked too, then may deadlock
  • Solution: libc wrapper programs perf_event interrupt triggered by next context-switch of task
  • If the syscall blocks, task is switched out, and rr tracer gets interrupt (SIGIO from perf_event)

Fun debugging tricks

  • Save register / RBC info at all events, verify in replay
  • Generate memory checksums at selected events, verify in replay
  • LLVM pass to add "execution path logging" (Bell-Larus): poor man's log of retired branches. Save to "magic fd" in recording, verify in replay.
  • Hack replayer itself to efficiently log arbitrary info at arbitrary points
rr-4.1.0/include/000077500000000000000000000000001265436462100135735ustar00rootroot00000000000000rr-4.1.0/include/rr/000077500000000000000000000000001265436462100142165ustar00rootroot00000000000000rr-4.1.0/include/rr/rr.h000066400000000000000000000025511265436462100150150ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_H_ #define RR_H_ /** * rr tracees can write data to this special fd that they want * verified across record/replay. When it's written in recording, rr * saves the data. During replay, the data are checked against the * recorded data. * * Tracees using this interface should take care that the buffers * storing the data are either not racy, or are synchronized by the * tracee. * * To simplify things, we make this a valid fd opened to /dev/null during * recording. * * Tracees may close this fd, or dup() something over it, etc. If that happens, * it will lose its magical properties. */ #define RR_MAGIC_SAVE_DATA_FD 999 /** * rr uses this fd to ensure the tracee has access to the original root * directory after a chroot(). Tracee close()es of this fd will be silently * ignored, and tracee dup()s to this fd will fail with EBADF. */ #define RR_RESERVED_ROOT_DIR_FD 1000 /** * The preferred fd that rr uses to control tracee desched. Some software * (e.g. the chromium IPC code) wants to have the first few fds all to itself, * so we need to stay above some floor. Tracee close()es of the fd that is * actually assigned will be silently ignored, and tracee dup()s to that fd will * fail with EBADF. */ #define RR_DESCHED_EVENT_FLOOR_FD 100 #endif /* RR_H_ */ rr-4.1.0/rr.spec000066400000000000000000000033561265436462100134560ustar00rootroot00000000000000Buildroot: @CPACK_BINARY_DIR@/_CPack_Packages/@CPACK_SYSTEM_NAME@/RPM/@CPACK_PACKAGE_FILE_NAME@ Summary: Lightweight tool for recording and replaying execution of applications (trees of processes and threads) Name: @CPACK_PACKAGE_NAME@ Version: @CPACK_PACKAGE_VERSION@ Release: @CPACK_RPM_PACKAGE_RELEASE@ License: @CPACK_RPM_PACKAGE_LICENSE@ Group: Development/Debuggers Vendor: @CPACK_PACKAGE_VENDOR@ Prefix: @CPACK_PACKAGING_INSTALL_PREFIX@ @CPACK_RPM_PACKAGE_REQUIRES@ %define _rpmdir @CPACK_BINARY_DIR@/_CPack_Packages/@CPACK_SYSTEM_NAME@/RPM %define _rpmfilename @CPACK_PACKAGE_FILE_NAME@.rpm %define _unpackaged_files_terminate_build 0 %define _topdir @CPACK_BINARY_DIR@/_CPack_Packages/@CPACK_SYSTEM_NAME@/RPM %description rr is a lightweight tool for recording and replaying execution of applications (trees of processes and threads). For more information, please visit http://rr-project.org # This is a shortcutted spec file generated by CMake RPM generator # we skip _install step because CPack does that for us. # We do only save CPack installed tree in _prepr # and then restore it in build. %prep mv $RPM_BUILD_ROOT @CPACK_BINARY_DIR@/_CPack_Packages/@CPACK_SYSTEM_NAME@/RPM/tmpBBroot %install if [ -e $RPM_BUILD_ROOT ]; then rm -Rf $RPM_BUILD_ROOT fi mv "@CPACK_BINARY_DIR@/_CPack_Packages/@CPACK_SYSTEM_NAME@/RPM/tmpBBroot" $RPM_BUILD_ROOT %files %defattr(-,root,root,-) @CPACK_PACKAGING_INSTALL_PREFIX@/lib/* @CPACK_PACKAGING_INSTALL_PREFIX@/bin/rr @CPACK_PACKAGING_INSTALL_PREFIX@/bin/exec_stub* @CPACK_PACKAGING_INSTALL_PREFIX@/bin/rr_page* @CPACK_PACKAGING_INSTALL_PREFIX@/bin/signal-rr-recording.sh %changelog * Tue Jun 25 2013 Chris Jones - - Initial build. rr-4.1.0/scripts/000077500000000000000000000000001265436462100136375ustar00rootroot00000000000000rr-4.1.0/scripts/signal-rr-recording.sh000077500000000000000000000010561265436462100200500ustar00rootroot00000000000000#!/usr/bin/bash signal=$1 if [[ "$signal" == "" ]]; then echo "Usage: $0 " >&2 echo "Sends to all processes being recorded by rr" >&2 exit 1 fi function signal_descendants { pid=$1 for child in `ps -o pid= --ppid $pid`; do echo Sending $signal to $child kill -s $signal $child signal_descendants $child done } for rr_pid in `pidof rr` ; do if cat /proc/$rr_pid/cmdline | tr '\0' '\n' | head -n2 | tail -n1 | grep -qz '\(^record$\)\|/' ; then signal_descendants $rr_pid fi done rr-4.1.0/src/000077500000000000000000000000001265436462100127375ustar00rootroot00000000000000rr-4.1.0/src/AddressSpace.cc000066400000000000000000001321741265436462100156170ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "AddressSpace" #include "AddressSpace.h" #include #include #include #include #include #include "rr/rr.h" #include "preload/preload_interface.h" #include "AutoRemoteSyscalls.h" #include "log.h" #include "RecordSession.h" #include "Session.h" #include "task.h" using namespace rr; using namespace std; /*static*/ const uint8_t AddressSpace::breakpoint_insn; void HasTaskSet::insert_task(Task* t) { LOG(debug) << "adding " << t->tid << " to task set " << this; tasks.insert(t); } void HasTaskSet::erase_task(Task* t) { LOG(debug) << "removing " << t->tid << " from task group " << this; tasks.erase(t); } /** * Advance *str to skip leading blank characters. */ static const char* trim_leading_blanks(const char* str) { const char* trimmed = str; while (isblank(*trimmed)) { ++trimmed; } return trimmed; } /** * The following helper is used to iterate over a tracee's memory * map. */ class KernelMapIterator { public: KernelMapIterator(Task* t) : t(t) { char maps_path[PATH_MAX]; sprintf(maps_path, "/proc/%d/maps", t->tid); ASSERT(t, (maps_file = fopen(maps_path, "r"))) << "Failed to open " << maps_path; ++*this; } ~KernelMapIterator() { if (maps_file) { fclose(maps_file); } } // It's very important to keep in mind that btrfs files can have the wrong // device number! const KernelMapping& current(string* raw_line = nullptr) { if (raw_line) { *raw_line = this->raw_line; } return km; } bool at_end() { return !maps_file; } void operator++(); private: Task* t; FILE* maps_file; string raw_line; KernelMapping km; }; void KernelMapIterator::operator++() { char line[PATH_MAX * 2]; if (!fgets(line, sizeof(line), maps_file)) { fclose(maps_file); maps_file = nullptr; return; } uint64_t start, end, offset, inode; int dev_major, dev_minor; char flags[32]; int chars_scanned; int nparsed = sscanf(line, "%" SCNx64 "-%" SCNx64 " %31s %" SCNx64 " %x:%x %" SCNu64 " %n", &start, &end, flags, &offset, &dev_major, &dev_minor, &inode, &chars_scanned); ASSERT(t, 8 /*number of info fields*/ == nparsed || 7 /*num fields if name is blank*/ == nparsed); // trim trailing newline, if any int last_char = strlen(line) - 1; if (line[last_char] == '\n') { line[last_char] = 0; } raw_line = line; const char* name = trim_leading_blanks(line + chars_scanned); #if defined(__i386__) if (start > numeric_limits::max() || end > numeric_limits::max() || strcmp(name, "[vsyscall]") == 0) { // We manually read the exe link here because // this helper is used to set // |t->vm()->exe_image()|, so we can't rely on // that being correct yet. char proc_exe[PATH_MAX]; char exe[PATH_MAX]; snprintf(proc_exe, sizeof(proc_exe), "/proc/%d/exe", t->tid); readlink(proc_exe, exe, sizeof(exe)); FATAL() << "Sorry, tracee " << t->tid << " has x86-64 image " << exe << " and that's not supported with a 32-bit rr."; } #endif int prot = (strchr(flags, 'r') ? PROT_READ : 0) | (strchr(flags, 'w') ? PROT_WRITE : 0) | (strchr(flags, 'x') ? PROT_EXEC : 0); int f = (strchr(flags, 'p') ? MAP_PRIVATE : 0) | (strchr(flags, 's') ? MAP_SHARED : 0); km = KernelMapping(start, end, name, MKDEV(dev_major, dev_minor), inode, prot, f, offset); } KernelMapping AddressSpace::read_kernel_mapping(Task* t, remote_ptr addr) { MemoryRange range(addr, 1); for (KernelMapIterator it(t); !it.at_end(); ++it) { const KernelMapping& km = it.current(); if (km.contains(range)) { return km; } } return KernelMapping(); } /** * Cat the /proc/[t->tid]/maps file to stdout, line by line. */ static void print_process_mmap(Task* t) { for (KernelMapIterator it(t); !it.at_end(); ++it) { string line; it.current(&line); cerr << line << '\n'; } } AddressSpace::~AddressSpace() { session_->on_destroy(this); } void AddressSpace::after_clone() { allocate_watchpoints(); } static remote_ptr find_rr_vdso(Task* t, size_t* len) { for (KernelMapIterator it(t); !it.at_end(); ++it) { auto& km = it.current(); if (km.fsname() == "[vdso]") { *len = km.size(); ASSERT(t, uint32_t(*len) == *len) << "VDSO more than 4GB???"; return km.start(); } } ASSERT(t, false) << "rr VDSO not found?"; return nullptr; } static uint32_t find_offset_of_syscall_instruction_in(SupportedArch arch, uint8_t* vdso_data, size_t vdso_len) { auto instruction = syscall_instruction(arch); for (uint32_t i = 1; i < vdso_len - instruction.size(); ++i) { if (memcmp(vdso_data + i, instruction.data(), instruction.size()) == 0) { return i; } } return 0; } uint32_t AddressSpace::offset_to_syscall_in_vdso[SupportedArch_MAX + 1]; remote_code_ptr AddressSpace::find_syscall_instruction(Task* t) { SupportedArch arch = t->arch(); if (!offset_to_syscall_in_vdso[arch]) { auto vdso_data = t->read_mem(vdso().start().cast(), vdso().size()); offset_to_syscall_in_vdso[arch] = find_offset_of_syscall_instruction_in( arch, vdso_data.data(), vdso_data.size()); ASSERT(t, offset_to_syscall_in_vdso[arch]) << "No syscall instruction found in VDSO"; } return remote_code_ptr( (vdso().start().cast() + offset_to_syscall_in_vdso[arch]) .as_int()); } static string find_rr_page_file(Task* t) { string path = exe_directory() + "rr_page_"; switch (t->arch()) { case x86: path += "32"; break; case x86_64: path += "64"; break; default: ASSERT(t, false) << "Unknown architecture"; return path; } if (!t->session().is_recording()) { path += "_replay"; } return path; } void AddressSpace::map_rr_page(Task* t) { int prot = PROT_EXEC | PROT_READ; int flags = MAP_PRIVATE | MAP_FIXED; struct stat fstat; string file_name; { AutoRemoteSyscalls remote(t); SupportedArch arch = t->arch(); string path = find_rr_page_file(t); AutoRestoreMem child_path(remote, path.c_str()); // skip leading '/' since we want the path to be relative to the root fd int child_fd = remote.infallible_syscall(syscall_number_for_openat(arch), RR_RESERVED_ROOT_DIR_FD, child_path.get() + 1, O_RDONLY); remote.infallible_mmap_syscall(rr_page_start(), rr_page_size(), prot, flags, child_fd, 0); fstat = t->stat_fd(child_fd); file_name = t->file_name_of_fd(child_fd); remote.infallible_syscall(syscall_number_for_close(arch), child_fd); if (t->session().is_recording()) { // brk() will not have been called yet so the brk area is empty. brk_start = brk_end = remote.infallible_syscall(syscall_number_for_brk(arch), 0); } } map(rr_page_start(), rr_page_size(), prot, flags, 0, file_name, fstat.st_dev, fstat.st_ino); traced_syscall_ip_ = rr_page_traced_syscall_ip(t->arch()); privileged_traced_syscall_ip_ = rr_page_privileged_traced_syscall_ip(t->arch()); } template static vector read_auxv_arch(Task* t) { auto stack_ptr = t->regs().sp().cast(); auto argc = t->read_mem(stack_ptr); stack_ptr += argc + 1; // Check final NULL in argv auto null_ptr = t->read_mem(stack_ptr); assert(null_ptr == 0); stack_ptr++; // Should now point to envp while (0 != t->read_mem(stack_ptr)) { stack_ptr++; } stack_ptr++; // should now point to ELF Auxiliary Table vector result; while (true) { auto pair_vec = t->read_mem(stack_ptr, 2); stack_ptr += 2; typename Arch::unsigned_word pair[2] = { pair_vec[0], pair_vec[1] }; result.resize(result.size() + sizeof(pair)); memcpy(result.data() + result.size() - sizeof(pair), pair, sizeof(pair)); if (pair[0] == 0) { break; } } return result; } static vector read_auxv(Task* t) { RR_ARCH_FUNCTION(read_auxv_arch, t->arch(), t); } void AddressSpace::save_auxv(Task* t) { saved_auxv_ = read_auxv(t); } void AddressSpace::post_exec_syscall(Task* t) { // First locate a syscall instruction we can use for remote syscalls. traced_syscall_ip_ = find_syscall_instruction(t); privileged_traced_syscall_ip_ = nullptr; // Now remote syscalls work, we can open_mem_fd. t->open_mem_fd(); // Now we can set up the "rr page" at its fixed address. This gives // us traced and untraced syscall instructions at known, fixed addresses. map_rr_page(t); } void AddressSpace::brk(remote_ptr addr, int prot) { LOG(debug) << "brk(" << addr << ")"; remote_ptr old_brk = ceil_page_size(brk_end); remote_ptr new_brk = ceil_page_size(addr); if (old_brk < new_brk) { map(old_brk, new_brk - old_brk, prot, MAP_ANONYMOUS | MAP_PRIVATE, 0, "[heap]", KernelMapping::NO_DEVICE, KernelMapping::NO_INODE); } else { unmap(new_brk, old_brk - new_brk); } brk_end = addr; } void AddressSpace::dump() const { fprintf(stderr, " (heap: %p-%p)\n", (void*)brk_start.as_int(), (void*)brk_end.as_int()); for (auto it = mem.begin(); it != mem.end(); ++it) { const KernelMapping& m = it->second.map; fprintf(stderr, "%s\n", m.str().c_str()); } } SupportedArch AddressSpace::arch() const { return (*task_set().begin())->arch(); } TrapType AddressSpace::get_breakpoint_type_for_retired_insn( remote_code_ptr ip) { remote_code_ptr addr = ip.decrement_by_bkpt_insn_length(SupportedArch::x86); return get_breakpoint_type_at_addr(addr); } TrapType AddressSpace::get_breakpoint_type_at_addr(remote_code_ptr addr) { auto it = breakpoints.find(addr); return it == breakpoints.end() ? TRAP_NONE : it->second.type(); } bool AddressSpace::is_breakpoint_in_private_read_only_memory( remote_code_ptr addr) { for (const auto& m : maps_starting_at(addr.to_data_ptr())) { if (m.map.start() >= addr.increment_by_bkpt_insn_length(arch()).to_data_ptr()) { break; } if ((m.map.prot() & PROT_WRITE) || (m.map.flags() & MAP_SHARED)) { return false; } } return true; } void AddressSpace::replace_breakpoints_with_original_values( uint8_t* dest, size_t length, remote_ptr addr) { for (auto& it : breakpoints) { remote_ptr bkpt_location = it.first.to_data_ptr(); remote_ptr start = max(addr, bkpt_location); remote_ptr end = min(addr + length, bkpt_location + it.second.data_length()); if (start < end) { memcpy(dest + (start - addr), it.second.original_data() + (start - bkpt_location), end - start); } } } static void remove_range(set& ranges, const MemoryRange& range) { auto start = ranges.lower_bound(range); auto end = start; auto prev_end = start; while (end != ranges.end() && end->start() < range.end()) { prev_end = end; ++end; } if (start == end) { return; } MemoryRange start_range = *start; MemoryRange end_range = *prev_end; ranges.erase(start, end); if (start_range.start() < range.start()) { ranges.insert(MemoryRange(start_range.start(), range.start())); } if (range.end() < end_range.end()) { ranges.insert(MemoryRange(range.end(), end_range.end())); } } static void add_range(set& ranges, const MemoryRange& range) { // Remove overlapping ranges remove_range(ranges, range); ranges.insert(range); // We could coalesce adjacent ranges, but there's probably no need. } KernelMapping AddressSpace::map(remote_ptr addr, size_t num_bytes, int prot, int flags, off64_t offset_bytes, const string& fsname, dev_t device, ino_t inode, const KernelMapping* recorded_map, TraceWriter::MappingOrigin origin) { LOG(debug) << "mmap(" << addr << ", " << num_bytes << ", " << HEX(prot) << ", " << HEX(flags) << ", " << HEX(offset_bytes); num_bytes = ceil_page_size(num_bytes); KernelMapping m(addr, addr + num_bytes, fsname, device, inode, prot, flags, offset_bytes); if (!num_bytes) { return m; } remove_range(dont_fork, MemoryRange(addr, num_bytes)); // The mmap() man page doesn't specifically describe // what should happen if an existing map is // "overwritten" by a new map (of the same resource). // In testing, the behavior seems to be as if the // overlapping region is unmapped and then remapped // per the arguments to the second call. unmap_internal(addr, num_bytes); const KernelMapping& actual_recorded_map = recorded_map ? *recorded_map : m; map_and_coalesce(m, actual_recorded_map); if ((prot & PROT_EXEC) && (fsname.find(SYSCALLBUF_LIB_FILENAME) != string::npos || fsname.find(SYSCALLBUF_LIB_FILENAME_32) != string::npos)) { syscallbuf_lib_start_ = addr; syscallbuf_lib_end_ = addr + num_bytes; } // During an emulated exec, we will explicitly map in a (copy of) the VDSO // at the recorded address. if (actual_recorded_map.is_vdso()) { vdso_start_addr = addr; } return m; } template void AddressSpace::at_preload_init_arch(Task* t) { auto params = t->read_mem( remote_ptr >(t->regs().arg1())); ASSERT(t, t->session().as_record()->use_syscall_buffer() == params.syscallbuf_enabled) << "Tracee thinks syscallbuf is " << (params.syscallbuf_enabled ? "en" : "dis") << "abled, but tracer thinks " << (t->session().as_record()->use_syscall_buffer() ? "en" : "dis") << "abled"; if (!params.syscallbuf_enabled) { return; } monkeypatch_state->patch_at_preload_init(t); } void AddressSpace::at_preload_init(Task* t) { ASSERT(t, syscallbuf_lib_start_) << "should have found preload library already"; RR_ARCH_FUNCTION(at_preload_init_arch, t->arch(), t); } const AddressSpace::Mapping& AddressSpace::mapping_of( remote_ptr addr) const { MemoryRange range(floor_page_size(addr), page_size()); auto it = mem.find(range); assert(it != mem.end()); assert(it->second.map.contains(range)); return it->second; } bool AddressSpace::has_mapping(remote_ptr addr) const { if (addr + page_size() < addr) { // Assume the last byte in the address space is never mapped; avoid overflow return false; } MemoryRange m(floor_page_size(addr), page_size()); auto it = mem.find(m); return it != mem.end() && it->first.contains(m); } void AddressSpace::protect(remote_ptr addr, size_t num_bytes, int prot) { LOG(debug) << "mprotect(" << addr << ", " << num_bytes << ", " << HEX(prot) << ")"; MemoryRange last_overlap; auto protector = [this, prot, &last_overlap](const Mapping& mm, const MemoryRange& rem) { LOG(debug) << " protecting (" << rem << ") ..."; Mapping m = move(mm); mem.erase(m.map); // PROT_GROWSDOWN means that if this is a grows-down segment // (which for us means "stack") then the change should be // extended to the start of the segment. // We don't try to handle the analogous PROT_GROWSUP, because we // don't understand the idea of a grows-up segment. remote_ptr new_start; if ((m.map.start() < rem.start()) && (prot & PROT_GROWSDOWN)) { new_start = m.map.start(); LOG(debug) << " PROT_GROWSDOWN: expanded region down to " << new_start; } else { new_start = rem.start(); } LOG(debug) << " erased (" << m.map << ")"; // If the first segment we protect underflows the // region, remap the underflow region with previous // prot. if (m.map.start() < new_start) { Mapping underflow( m.map.subrange(m.map.start(), rem.start()), m.recorded_map.subrange(m.recorded_map.start(), rem.start())); mem[underflow.map] = underflow; } // Remap the overlapping region with the new prot. remote_ptr new_end = min(rem.end(), m.map.end()); int new_prot = prot & (PROT_READ | PROT_WRITE | PROT_EXEC); Mapping overlap( m.map.subrange(new_start, new_end).set_prot(new_prot), m.recorded_map.subrange(new_start, new_end).set_prot(new_prot)); mem[overlap.map] = overlap; last_overlap = overlap.map; // If the last segment we protect overflows the // region, remap the overflow region with previous // prot. if (rem.end() < m.map.end()) { Mapping overflow(m.map.subrange(rem.end(), m.map.end()), m.recorded_map.subrange(rem.end(), m.map.end())); mem[overflow.map] = overflow; } }; for_each_in_range(addr, num_bytes, protector, ITERATE_CONTIGUOUS); if (last_overlap.size()) { // All mappings that we altered which might need coalescing // are adjacent to |last_overlap|. coalesce_around(mem.find(last_overlap)); } } void AddressSpace::fixup_mprotect_growsdown_parameters(Task* t) { ASSERT(t, !(t->regs().arg3() & PROT_GROWSUP)); if (t->regs().arg3() & PROT_GROWSDOWN) { Registers r = t->regs(); if (r.arg1() == floor_page_size(r.arg1()) && has_mapping(r.arg1())) { auto& km = mapping_of(r.arg1()).map; if (km.flags() & MAP_GROWSDOWN) { auto new_start = km.start(); r.set_arg2(remote_ptr(r.arg1()) + size_t(r.arg2()) - new_start); r.set_arg1(new_start); r.set_arg3(r.arg3() & ~PROT_GROWSDOWN); t->set_regs(r); } } } } void AddressSpace::remap(remote_ptr old_addr, size_t old_num_bytes, remote_ptr new_addr, size_t new_num_bytes) { LOG(debug) << "mremap(" << old_addr << ", " << old_num_bytes << ", " << new_addr << ", " << new_num_bytes << ")"; auto mr = mapping_of(old_addr); const KernelMapping& m = mr.map; old_num_bytes = ceil_page_size(old_num_bytes); unmap_internal(old_addr, old_num_bytes); if (0 == new_num_bytes) { return; } auto it = dont_fork.lower_bound(MemoryRange(old_addr, old_num_bytes)); if (it != dont_fork.end() && it->start() < old_addr + old_num_bytes) { // mremap fails if some but not all pages are marked DONTFORK assert(*it == MemoryRange(old_addr, old_num_bytes)); remove_range(dont_fork, MemoryRange(old_addr, old_num_bytes)); add_range(dont_fork, MemoryRange(new_addr, new_num_bytes)); } else { remove_range(dont_fork, MemoryRange(old_addr, old_num_bytes)); remove_range(dont_fork, MemoryRange(new_addr, new_num_bytes)); } remote_ptr new_end = new_addr + new_num_bytes; map_and_coalesce(m.set_range(new_addr, new_end), mr.recorded_map.set_range(new_addr, new_end)); } void AddressSpace::remove_breakpoint(remote_code_ptr addr, TrapType type) { auto it = breakpoints.find(addr); if (it == breakpoints.end() || it->second.unref(type) > 0) { return; } destroy_breakpoint(it); } bool AddressSpace::add_breakpoint(remote_code_ptr addr, TrapType type) { auto it = breakpoints.find(addr); if (it == breakpoints.end()) { uint8_t overwritten_data; // Grab a random task from the VM so we can use its // read/write_mem() helpers. Task* t = *task_set().begin(); if (sizeof(overwritten_data) != t->read_bytes_fallible(addr.to_data_ptr(), sizeof(overwritten_data), &overwritten_data)) { return false; } t->write_mem(addr.to_data_ptr(), breakpoint_insn); auto it_and_is_new = breakpoints.insert(make_pair(addr, Breakpoint())); assert(it_and_is_new.second); it_and_is_new.first->second.overwritten_data = overwritten_data; it = it_and_is_new.first; } it->second.ref(type); return true; } void AddressSpace::remove_all_breakpoints() { while (!breakpoints.empty()) { destroy_breakpoint(breakpoints.begin()); } } int AddressSpace::access_bits_of(WatchType type) { switch (type) { case WATCH_EXEC: return EXEC_BIT; case WATCH_WRITE: return WRITE_BIT; case WATCH_READWRITE: return READ_BIT | WRITE_BIT; default: FATAL() << "Unknown watchpoint type " << type; return 0; // not reached } } void AddressSpace::remove_watchpoint(remote_ptr addr, size_t num_bytes, WatchType type) { auto it = watchpoints.find(MemoryRange(addr, num_bytes)); if (it != watchpoints.end() && 0 == it->second.unwatch(access_bits_of(type))) { watchpoints.erase(it); } allocate_watchpoints(); } bool AddressSpace::add_watchpoint(remote_ptr addr, size_t num_bytes, WatchType type) { MemoryRange key(addr, num_bytes); auto it = watchpoints.find(key); if (it == watchpoints.end()) { auto it_and_is_new = watchpoints.insert(make_pair(key, Watchpoint(num_bytes))); assert(it_and_is_new.second); it = it_and_is_new.first; update_watchpoint_value(it->first, it->second); } it->second.watch(access_bits_of(type)); return allocate_watchpoints(); } void AddressSpace::save_watchpoints() { saved_watchpoints.push_back(watchpoints); } bool AddressSpace::restore_watchpoints() { assert(!saved_watchpoints.empty()); watchpoints = saved_watchpoints[saved_watchpoints.size() - 1]; saved_watchpoints.pop_back(); return allocate_watchpoints(); } bool AddressSpace::update_watchpoint_value(const MemoryRange& range, Watchpoint& watchpoint) { Task* t = *task_set().begin(); bool valid = true; vector value_bytes = watchpoint.value_bytes; for (size_t i = 0; i < value_bytes.size(); ++i) { value_bytes[i] = 0xFF; } remote_ptr addr = range.start(); size_t num_bytes = range.size(); while (num_bytes > 0) { ssize_t bytes_read = t->read_bytes_fallible( addr, num_bytes, value_bytes.data() + (addr - range.start())); if (bytes_read <= 0) { valid = false; // advance to next page and try to read more. We want to know // when the valid part of a partially invalid watchpoint changes. bytes_read = min(num_bytes, (floor_page_size(addr) + page_size()) - addr); } addr += bytes_read; num_bytes -= bytes_read; } bool changed = valid != watchpoint.valid || memcmp(value_bytes.data(), watchpoint.value_bytes.data(), value_bytes.size()) != 0; watchpoint.valid = valid; watchpoint.value_bytes = value_bytes; return changed; } void AddressSpace::update_watchpoint_values(remote_ptr start, remote_ptr end) { MemoryRange r(start, end); for (auto& it : watchpoints) { if (it.first.intersects(r) && update_watchpoint_value(it.first, it.second)) { it.second.changed = true; // We do nothing to track kernel reads of read-write watchpoints... } } } static int DR_WATCHPOINT(int n) { return 1 << n; } static bool watchpoint_triggered(uintptr_t debug_status, const vector& regs) { for (auto reg : regs) { if (debug_status & DR_WATCHPOINT(reg)) { return true; } } return false; } bool AddressSpace::notify_watchpoint_fired(uintptr_t debug_status) { bool triggered = false; for (auto& it : watchpoints) { if (((it.second.watched_bits() & WRITE_BIT) && update_watchpoint_value(it.first, it.second)) || ((it.second.watched_bits() & (READ_BIT | EXEC_BIT)) && watchpoint_triggered(debug_status, it.second.debug_regs_for_exec_read))) { it.second.changed = true; triggered = true; } } return triggered; } void AddressSpace::notify_written(remote_ptr addr, size_t num_bytes) { update_watchpoint_values(addr, addr + num_bytes); session()->accumulate_bytes_written(num_bytes); } void AddressSpace::remove_all_watchpoints() { watchpoints.clear(); allocate_watchpoints(); } void AddressSpace::unmap(remote_ptr addr, ssize_t num_bytes) { LOG(debug) << "munmap(" << addr << ", " << num_bytes << ")"; num_bytes = ceil_page_size(num_bytes); if (!num_bytes) { return; } remove_range(dont_fork, MemoryRange(addr, num_bytes)); return unmap_internal(addr, num_bytes); } void AddressSpace::unmap_internal(remote_ptr addr, ssize_t num_bytes) { LOG(debug) << "munmap(" << addr << ", " << num_bytes << ")"; auto unmapper = [this](const Mapping& mm, const MemoryRange& rem) { LOG(debug) << " unmapping (" << rem << ") ..."; Mapping m = move(mm); mem.erase(m.map); LOG(debug) << " erased (" << m.map << ") ..."; // If the first segment we unmap underflows the unmap // region, remap the underflow region. if (m.map.start() < rem.start()) { Mapping underflow(m.map.subrange(m.map.start(), rem.start()), m.recorded_map.subrange(m.map.start(), rem.start())); mem[underflow.map] = underflow; } // If the last segment we unmap overflows the unmap // region, remap the overflow region. if (rem.end() < m.map.end()) { Mapping overflow(m.map.subrange(rem.end(), m.map.end()), m.recorded_map.subrange(rem.end(), m.map.end())); mem[overflow.map] = overflow; } }; for_each_in_range(addr, num_bytes, unmapper); update_watchpoint_values(addr, addr + num_bytes); } void AddressSpace::advise(remote_ptr addr, ssize_t num_bytes, int advice) { LOG(debug) << "madvise(" << addr << ", " << num_bytes << ", " << advice << ")"; num_bytes = ceil_page_size(num_bytes); switch (advice) { case MADV_DONTFORK: add_range(dont_fork, MemoryRange(addr, num_bytes)); break; case MADV_DOFORK: remove_range(dont_fork, MemoryRange(addr, num_bytes)); break; default: break; } } void AddressSpace::did_fork_into(Task* t) { for (auto& range : dont_fork) { // During recording we execute MADV_DONTFORK so the forked child will // have had its dontfork areas unmapped by the kernel already if (!t->session().is_recording()) { AutoRemoteSyscalls remote(t); remote.infallible_syscall(syscall_number_for_munmap(remote.arch()), range.start(), range.size()); } t->vm()->unmap(range.start(), range.size()); } } static string strip_deleted(const string& s) { static const char deleted[] = " (deleted)"; ssize_t find_deleted = s.size() - (sizeof(deleted) - 1); if (s.find(deleted) == size_t(find_deleted)) { return s.substr(0, find_deleted); } return s; } enum HandleHeap { TREAT_HEAP_AS_ANONYMOUS, RESPECT_HEAP }; static bool normalized_file_names_equal(const KernelMapping& km1, const KernelMapping& km2, HandleHeap handle_heap) { if (km1.is_stack() || km2.is_stack()) { // The kernel seems to use "[stack:]" for any mapping area containing // thread |tid|'s stack pointer. When the thread exits, the next read of // the maps doesn't treat the area as stack at all. We don't want to track // thread exits, so if one of the mappings is a stack, skip the name // comparison. Device and inode numbers will still be checked. return true; } if (handle_heap == TREAT_HEAP_AS_ANONYMOUS && (km1.is_heap() || km2.is_heap())) { // The kernel's heuristics for treating an anonymous mapping as "[heap]" // are obscure. Just skip the name check. Device and inode numbers will // still be checked. return true; } // We don't track when a file gets deleted, so it's possible for the kernel // to have " (deleted)" when we don't. return strip_deleted(km1.fsname()) == strip_deleted(km2.fsname()); } /** * Return true iff |left| and |right| are located adjacently in memory * with the same metadata, and map adjacent locations of the same * underlying (real) device. */ static bool is_adjacent_mapping(const KernelMapping& mleft, const KernelMapping& mright, HandleHeap handle_heap, int32_t flags_to_check = 0xFFFFFFFF) { if (mleft.end() != mright.start()) { LOG(debug) << " (not adjacent in memory)"; return false; } if (((mleft.flags() ^ mright.flags()) & flags_to_check) || mleft.prot() != mright.prot()) { LOG(debug) << " (flags or prot differ)"; return false; } if (!normalized_file_names_equal(mleft, mright, handle_heap)) { LOG(debug) << " (not the same filename)"; return false; } if (mleft.device() != mright.device() || mleft.inode() != mright.inode()) { LOG(debug) << " (not the same device/inode)"; return false; } if (mleft.is_real_device() && mleft.file_offset_bytes() + off64_t(mleft.size()) != mright.file_offset_bytes()) { LOG(debug) << " (" << mleft.file_offset_bytes() << " + " << mleft.size() << " != " << mright.file_offset_bytes() << ": offsets into real device aren't adjacent)"; return false; } LOG(debug) << " adjacent!"; return true; } /** * If |*left_m| and |right_m| are adjacent (see * |is_adjacent_mapping()|), write a merged segment descriptor to * |*left_m| and return true. Otherwise return false. */ static bool try_merge_adjacent(KernelMapping* left_m, const KernelMapping& right_m) { if (is_adjacent_mapping(*left_m, right_m, TREAT_HEAP_AS_ANONYMOUS, KernelMapping::checkable_flags_mask)) { *left_m = KernelMapping(left_m->start(), right_m.end(), left_m->fsname(), left_m->device(), left_m->inode(), right_m.prot(), right_m.flags(), left_m->file_offset_bytes()); return true; } return false; } static dev_t normalized_device_number(const KernelMapping& m) { if (m.fsname().c_str()[0] != '/') { return m.device(); } // btrfs files can report the wrong device number in /proc//maps, so // restrict ourselves to checking whether the device number is != 0 if (m.device() != KernelMapping::NO_DEVICE) { return (dev_t)-1; } return m.device(); } static void assert_segments_match(Task* t, const KernelMapping& input_m, const KernelMapping& km) { KernelMapping m = input_m; string err; if (m.start() != km.start()) { err = "starts differ"; } else if (m.end() != km.end()) { err = "ends differ"; } else if (m.prot() != km.prot()) { err = "prots differ"; } else if ((m.flags() ^ km.flags()) & KernelMapping::checkable_flags_mask) { err = "flags differ"; } else if (!normalized_file_names_equal(m, km, TREAT_HEAP_AS_ANONYMOUS) && !(km.is_heap() && m.fsname() == "") && !(m.is_heap() && km.fsname() == "") && !km.is_vdso()) { // Due to emulated exec, the kernel may identify any of our anonymous maps // as [heap] (or not). // Kernels before 3.16 have a bug where any mapping at the original VDSO // address is marked [vdso] even if the VDSO was unmapped and replaced by // something else, so if the kernel reports [vdso] it may be spurious and // we skip this check. See kernel commit // a62c34bd2a8a3f159945becd57401e478818d51c. err = "filenames differ"; } else if (normalized_device_number(m) != normalized_device_number(km)) { err = "devices_differ"; } else if (m.inode() != km.inode()) { err = "inodes differ"; } if (err.size()) { LOG(error) << "cached mmap:"; t->vm()->dump(); LOG(error) << "/proc/" << t->tid << "/mmaps:"; print_process_mmap(t); ASSERT(t, false) << "\nCached mapping " << m << " should be " << km << "; " << err; } } KernelMapping AddressSpace::fix_stack_segment_start( const MemoryRange& mapping, remote_ptr new_start) { auto it = mem.find(mapping); it->first.update_start(new_start); it->second.map.update_start(new_start); it->second.recorded_map.update_start(new_start); return it->second.map; } KernelMapping AddressSpace::vdso() const { assert(!vdso_start_addr.is_null()); return mapping_of(vdso_start_addr).map; } /** * Iterate over /proc/maps segments for a task and verify that the * task's cached mapping matches the kernel's (given a lenient fuzz * factor). */ void AddressSpace::verify(Task* t) const { ASSERT(t, task_set().end() != task_set().find(t)); MemoryMap::const_iterator mem_it = mem.begin(); KernelMapIterator kernel_it(t); while (!kernel_it.at_end() && mem_it != mem.end()) { KernelMapping km = kernel_it.current(); ++kernel_it; while (!kernel_it.at_end()) { KernelMapping next_km = kernel_it.current(); if (!try_merge_adjacent(&km, next_km)) { break; } ++kernel_it; } KernelMapping vm = mem_it->second.map; ++mem_it; while (mem_it != mem.end() && try_merge_adjacent(&vm, mem_it->second.map)) { ++mem_it; } assert_segments_match(t, vm, km); } ASSERT(t, kernel_it.at_end() && mem_it == mem.end()); } AddressSpace::AddressSpace(Task* t, const string& exe, uint32_t exec_count) : exe(exe), leader_tid_(t->rec_tid), leader_serial(t->tuid().serial()), exec_count(exec_count), is_clone(false), session_(&t->session()), monkeypatch_state(t->session().is_recording() ? new Monkeypatcher() : nullptr), child_mem_fd(-1), first_run_event_(0) { // TODO: this is a workaround of // https://github.com/mozilla/rr/issues/1113 . if (session_->can_validate()) { populate_address_space(t); assert(!vdso_start_addr.is_null()); } else { // Find the location of the VDSO in the just-spawned process. This will // match the VDSO in rr itself since we haven't execed yet. So, speed // things up by searching rr's own VDSO for a syscall instruction. size_t rr_vdso_len; remote_ptr rr_vdso = find_rr_vdso(t, &rr_vdso_len); // Here we rely on the VDSO location in the spawned tracee being the same // as in rr itself. uint8_t* local_vdso = reinterpret_cast(rr_vdso.as_int()); auto offset = find_offset_of_syscall_instruction_in( NativeArch::arch(), local_vdso, rr_vdso_len); offset_to_syscall_in_vdso[NativeArch::arch()] = offset; // Setup traced_syscall_ip_ now because we need to do AutoRemoteSyscalls // (for open_mem_fd) before the first exec. traced_syscall_ip_ = remote_code_ptr(rr_vdso.as_int() + offset); } } AddressSpace::AddressSpace(Session* session, const AddressSpace& o, pid_t leader_tid, uint32_t leader_serial, uint32_t exec_count) : exe(o.exe), leader_tid_(leader_tid), leader_serial(leader_serial), exec_count(exec_count), brk_start(o.brk_start), brk_end(o.brk_end), is_clone(true), mem(o.mem), session_(session), vdso_start_addr(o.vdso_start_addr), monkeypatch_state(o.monkeypatch_state ? new Monkeypatcher(*o.monkeypatch_state) : nullptr), traced_syscall_ip_(o.traced_syscall_ip_), privileged_traced_syscall_ip_(o.privileged_traced_syscall_ip_), syscallbuf_lib_start_(o.syscallbuf_lib_start_), syscallbuf_lib_end_(o.syscallbuf_lib_end_), saved_auxv_(o.saved_auxv_), first_run_event_(0) { for (auto& it : o.breakpoints) { breakpoints.insert(make_pair(it.first, it.second)); } for (auto& it : o.watchpoints) { watchpoints.insert(make_pair(it.first, it.second)); } if (session != o.session()) { // Cloning into a new session means we're checkpointing. first_run_event_ = o.first_run_event_; } // cloned tasks will automatically get cloned debug registers and // cloned address-space memory, so we don't need to do any more work here. } static bool try_split_unaligned_range(MemoryRange& range, size_t bytes, vector& result) { if ((range.start().as_int() & (bytes - 1)) || range.size() < bytes) { return false; } result.push_back(MemoryRange(range.start(), bytes)); range = MemoryRange(range.start() + bytes, range.end()); return true; } static vector split_range(const MemoryRange& range) { vector result; MemoryRange r = range; while (r.size() > 0) { if ((sizeof(void*) < 8 || !try_split_unaligned_range(r, 8, result)) && !try_split_unaligned_range(r, 4, result) && !try_split_unaligned_range(r, 2, result)) { bool ret = try_split_unaligned_range(r, 1, result); assert(ret); } } return result; } static void configure_watch_registers(vector& regs, const MemoryRange& range, WatchType type, vector* assigned_regs) { auto split_ranges = split_range(range); if (type == WATCH_WRITE && range.size() > 1) { // We can suppress spurious write-watchpoint triggerings by checking // whether memory values have changed. So we can sometimes conserve // debug registers by upgrading an unaligned range to an aligned range // of a larger size. uintptr_t align; if (range.size() <= 2) { align = 2; } else if (range.size() <= 4 || sizeof(void*) <= 4) { align = 4; } else { align = 8; } remote_ptr aligned_start(range.start().as_int() & ~(align - 1)); remote_ptr aligned_end((range.end().as_int() + (align - 1)) & ~(align - 1)); auto split = split_range(MemoryRange(aligned_start, aligned_end)); // If the aligned range doesn't reduce register usage, use the original // split to avoid spurious triggerings if (split.size() < split_ranges.size()) { split_ranges = split; } } for (auto& r : split_ranges) { if (assigned_regs) { assigned_regs->push_back(regs.size()); } regs.push_back(WatchConfig(r.start(), r.size(), type)); } } vector AddressSpace::get_watch_configs( WillSetTaskState will_set_task_state) { vector result; for (auto& kv : watchpoints) { vector* assigned_regs = nullptr; if (will_set_task_state == SETTING_TASK_STATE) { kv.second.debug_regs_for_exec_read.clear(); assigned_regs = &kv.second.debug_regs_for_exec_read; } const MemoryRange& r = kv.first; int watching = kv.second.watched_bits(); if (EXEC_BIT & watching) { configure_watch_registers(result, r, WATCH_EXEC, assigned_regs); } if (READ_BIT & watching) { configure_watch_registers(result, r, WATCH_READWRITE, assigned_regs); } else if (WRITE_BIT & watching) { configure_watch_registers(result, r, WATCH_WRITE, nullptr); } } return result; } vector AddressSpace::get_watchpoints_internal( WatchpointFilter filter) { vector result; for (auto& kv : watchpoints) { if (filter == CHANGED_WATCHPOINTS) { if (!kv.second.changed) { continue; } kv.second.changed = false; } const MemoryRange& r = kv.first; int watching = kv.second.watched_bits(); if (EXEC_BIT & watching) { result.push_back(WatchConfig(r.start(), r.size(), WATCH_EXEC)); } if (READ_BIT & watching) { result.push_back(WatchConfig(r.start(), r.size(), WATCH_READWRITE)); } else if (WRITE_BIT & watching) { result.push_back(WatchConfig(r.start(), r.size(), WATCH_WRITE)); } } return result; } vector AddressSpace::consume_watchpoint_changes() { return get_watchpoints_internal(CHANGED_WATCHPOINTS); } vector AddressSpace::all_watchpoints() { return get_watchpoints_internal(ALL_WATCHPOINTS); } bool AddressSpace::allocate_watchpoints() { Task::DebugRegs regs = get_watch_configs(SETTING_TASK_STATE); if (regs.size() <= 0x7f) { bool ok = true; for (auto t : task_set()) { if (!t->set_debug_regs(regs)) { ok = false; } } if (ok) { return true; } } regs.clear(); for (auto t2 : task_set()) { t2->set_debug_regs(regs); } for (auto kv : watchpoints) { kv.second.debug_regs_for_exec_read.clear(); } return false; } void AddressSpace::coalesce_around(MemoryMap::iterator it) { auto first_kv = it; while (mem.begin() != first_kv) { auto next = first_kv; --first_kv; if (!is_adjacent_mapping(first_kv->second.map, next->second.map, RESPECT_HEAP)) { first_kv = next; break; } } auto last_kv = it; while (true) { auto prev = last_kv; ++last_kv; if (mem.end() == last_kv || !is_adjacent_mapping(prev->second.map, last_kv->second.map, RESPECT_HEAP)) { last_kv = prev; break; } } assert(last_kv != mem.end()); if (first_kv == last_kv) { LOG(debug) << " no mappings to coalesce"; return; } Mapping new_m(first_kv->second.map.extend(last_kv->first.end()), first_kv->second.recorded_map.extend(last_kv->first.end())); LOG(debug) << " coalescing " << new_m.map; mem.erase(first_kv, ++last_kv); auto ins = mem.insert(MemoryMap::value_type(new_m.map, new_m)); assert(ins.second); // key didn't already exist } void AddressSpace::destroy_breakpoint(BreakpointMap::const_iterator it) { Task* t = *task_set().begin(); t->write_mem(it->first.to_data_ptr(), it->second.overwritten_data); breakpoints.erase(it); } void AddressSpace::for_each_in_range( remote_ptr addr, ssize_t num_bytes, function f, int how) { remote_ptr region_start = floor_page_size(addr); remote_ptr last_unmapped_end = region_start; remote_ptr region_end = ceil_page_size(addr + num_bytes); while (last_unmapped_end < region_end) { // Invariant: |rem| is always exactly the region of // memory remaining to be examined for pages to be // unmapped. MemoryRange rem(last_unmapped_end, region_end); // The next page to iterate may not be contiguous with // the last one seen. auto it = mem.lower_bound(rem); if (mem.end() == it) { LOG(debug) << " not found, done."; return; } // Don't make a reference here. |f| is allowed to erase Mappings. MemoryRange range = it->first; if (rem.end() <= range.start()) { LOG(debug) << " mapping at " << range.start() << " out of range, done."; return; } if (ITERATE_CONTIGUOUS == how && !(range.start() < region_start || rem.start() == range.start())) { LOG(debug) << " discontiguous mapping at " << range.start() << ", done."; return; } f(it->second, rem); // Maintain the loop invariant. last_unmapped_end = range.end(); } } void AddressSpace::map_and_coalesce(const KernelMapping& m, const KernelMapping& recorded_map) { LOG(debug) << " mapping " << m; auto ins = mem.insert(MemoryMap::value_type(m, Mapping(m, recorded_map))); coalesce_around(ins.first); update_watchpoint_values(m.start(), m.end()); } static bool could_be_stack(const KernelMapping& km) { // On 4.1.6-200.fc22.x86_64 we observe that during exec of the exec_stub // during replay, when the process switches from 32-bit to 64-bit, the 64-bit // registers seem truncated to 32 bits during the initial PTRACE_GETREGS so // our sp looks wrong and /proc//maps doesn't identify the region as // stack. // On stub execs there should only be one read-writable memory area anyway. return km.prot() == (PROT_READ | PROT_WRITE) && km.fsname() == "" && km.device() == KernelMapping::NO_DEVICE && km.inode() == KernelMapping::NO_INODE; } static dev_t check_device(Task* t, const KernelMapping& km) { if (km.fsname().c_str()[0] != '/') { return km.device(); } // btrfs files can return the wrong device number in /proc//maps struct stat st; int ret = stat(km.fsname().c_str(), &st); ASSERT(t, ret == 0); return st.st_dev; } void AddressSpace::populate_address_space(Task* t) { bool found_proper_stack = false; for (KernelMapIterator it(t); !it.at_end(); ++it) { auto& km = it.current(); if (km.is_stack()) { found_proper_stack = true; } } int found_stacks = 0; for (KernelMapIterator it(t); !it.at_end(); ++it) { auto& km = it.current(); int flags = km.flags(); remote_ptr start = km.start(); ASSERT(t, flags & MAP_PRIVATE); bool is_stack = found_proper_stack ? km.is_stack() : could_be_stack(km); if (is_stack) { ++found_stacks; flags |= MAP_GROWSDOWN; // MAP_GROWSDOWN segments really occupy one additional page before // the start address shown by /proc//maps --- unless that page // is already occupied by another mapping. if (!has_mapping(start - page_size())) { start -= page_size(); } } map(start, km.end() - start, km.prot(), flags, km.file_offset_bytes(), km.fsname(), check_device(t, km), km.inode(), nullptr, TraceWriter::EXEC_MAPPING); } ASSERT(t, found_stacks == 1); } rr-4.1.0/src/AddressSpace.h000066400000000000000000000754211265436462100154620ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_ADDRESS_SPACE_H_ #define RR_ADDRESS_SPACE_H_ #include #include #include #include #include #include #include #include #include #include "preload/preload_interface.h" #include "kernel_abi.h" #include "MemoryRange.h" #include "Monkeypatcher.h" #include "remote_code_ptr.h" #include "TaskishUid.h" #include "TraceStream.h" #include "util.h" class Session; class Task; /** * Base class for classes that manage a set of Tasks. */ class HasTaskSet { public: typedef std::set TaskSet; const TaskSet& task_set() const { return tasks; } void insert_task(Task* t); void erase_task(Task* t); bool has_task(Task* t) const { return tasks.find(t) != tasks.end(); } protected: TaskSet tasks; }; /** * Records information that the kernel knows about a mapping. This includes * everything returned through /proc//maps but also information that * we know from observing mmap and mprotect calls. */ class KernelMapping : public MemoryRange { public: /** * These are the flags we track internally to distinguish * between adjacent segments. For example, the kernel * considers a NORESERVE anonynmous mapping that's adjacent to * a non-NORESERVE mapping distinct, even if all other * metadata are the same. See |is_adjacent_mapping()|. */ static const int map_flags_mask = MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE | MAP_SHARED | MAP_STACK | MAP_GROWSDOWN; static const int checkable_flags_mask = MAP_PRIVATE | MAP_SHARED; static const dev_t NO_DEVICE = 0; static const ino_t NO_INODE = 0; KernelMapping() : device_(0), inode_(0), prot_(0), flags_(0), offset(0) {} KernelMapping(remote_ptr start, remote_ptr end, const std::string& fsname, dev_t device, ino_t inode, int prot, int flags, off64_t offset = 0) : MemoryRange(start, end), fsname_(fsname), device_(device), inode_(inode), prot_(prot), flags_(flags & map_flags_mask), offset(offset) { assert_valid(); } KernelMapping(const KernelMapping& o) : MemoryRange(o), fsname_(o.fsname_), device_(o.device_), inode_(o.inode_), prot_(o.prot_), flags_(o.flags_), offset(o.offset) { assert_valid(); } KernelMapping operator=(const KernelMapping& o) { this->~KernelMapping(); new (this) KernelMapping(o); return *this; } void assert_valid() const { assert(end() >= start()); assert(size() % page_size() == 0); assert(!(flags_ & ~map_flags_mask)); assert(offset % page_size() == 0); } KernelMapping extend(remote_ptr end) const { assert(end >= MemoryRange::end()); return KernelMapping(start(), end, fsname_, device_, inode_, prot_, flags_, offset); } KernelMapping set_range(remote_ptr start, remote_ptr end) const { return KernelMapping(start, end, fsname_, device_, inode_, prot_, flags_, offset); } KernelMapping subrange(remote_ptr start, remote_ptr end) const { assert(start >= MemoryRange::start() && end <= MemoryRange::end()); return KernelMapping( start, end, fsname_, device_, inode_, prot_, flags_, offset + (is_real_device() ? start - MemoryRange::start() : 0)); } KernelMapping set_prot(int prot) const { return KernelMapping(start(), end(), fsname_, device_, inode_, prot, flags_, offset); } /** * Dump a representation of |this| to a string in a format * similar to the former part of /proc/[tid]/maps. */ std::string str() const { char str[200]; sprintf(str, "%8p-%8p %c%c%c%c %08" PRIx64 " %02x:%02x %-10ld ", (void*)start().as_int(), (void*)end().as_int(), (PROT_READ & prot_) ? 'r' : '-', (PROT_WRITE & prot_) ? 'w' : '-', (PROT_EXEC & prot_) ? 'x' : '-', (MAP_SHARED & flags_) ? 's' : 'p', offset, (int)MAJOR(device()), (int)MINOR(device()), (long)inode()); return str + fsname(); } const std::string& fsname() const { return fsname_; } dev_t device() const { return device_; } ino_t inode() const { return inode_; } int prot() const { return prot_; } int flags() const { return flags_; } uint64_t file_offset_bytes() const { return offset; } /** * Return true if this file is/was backed by an external * device, as opposed to a transient RAM mapping. */ bool is_real_device() const { return device() > NO_DEVICE; } bool is_vdso() const { return fsname() == "[vdso]"; } bool is_heap() const { return fsname() == "[heap]"; } bool is_stack() const { return fsname().find("[stack") == 0; } bool is_vvar() const { return fsname() == "[vvar]"; } bool is_vsyscall() const { return fsname() == "[vsyscall]"; } struct stat fake_stat() const { struct stat fake_stat; memset(&fake_stat, 0, sizeof(fake_stat)); fake_stat.st_dev = device(); fake_stat.st_ino = inode(); return fake_stat; } private: // The kernel's name for the mapping, as per /proc//maps. This must // be exactly correct. const std::string fsname_; dev_t device_; ino_t inode_; const int prot_; const int flags_; const uint64_t offset; }; inline std::ostream& operator<<(std::ostream& o, const KernelMapping& m) { o << m.str(); return o; } /** * Compare |a| and |b| so that "subset" lookups will succeed. What * does that mean? If |a| and |b| overlap (intersect), then this * comparator considers them equivalent. That means that if |a| * represents one byte within a mapping |b|, then |a| and |b| will be * considered equivalent. * * If |a| and |b| don't overlap, return true if |a|'s start addres is * less than |b|'s/ */ struct MappingComparator { bool operator()(const MemoryRange& a, const MemoryRange& b) const { return a.intersects(b) ? false : a.start() < b.start(); } }; enum TrapType { TRAP_NONE = 0, // Trap for debugger 'stepi' request. TRAP_STEPI, // Trap for internal rr purposes, f.e. replaying async // signals. TRAP_BKPT_INTERNAL, // Trap on behalf of a debugger user. TRAP_BKPT_USER, }; enum WatchType { // NB: these random-looking enumeration values are chosen to // match the numbers programmed into x86 debug registers. WATCH_EXEC = 0x00, WATCH_WRITE = 0x01, WATCH_READWRITE = 0x03 }; enum DebugStatus { DS_WATCHPOINT_ANY = 0xf, DS_SINGLESTEP = 1 << 14, }; /** * A distinct watchpoint, corresponding to the information needed to * program a single x86 debug register. */ struct WatchConfig { WatchConfig(remote_ptr addr, size_t num_bytes, WatchType type) : addr(addr), num_bytes(num_bytes), type(type) {} remote_ptr addr; size_t num_bytes; WatchType type; }; /** * Models the address space for a set of tasks. This includes the set * of mapped pages, and the resources those mappings refer to. */ class AddressSpace : public HasTaskSet { friend class Session; friend struct VerifyAddressSpace; public: class Mapping { public: Mapping(const KernelMapping& map, const KernelMapping& recorded_map) : map(map), recorded_map(recorded_map) {} Mapping(const Mapping& other) = default; Mapping() = default; const Mapping& operator=(const Mapping& other) { this->~Mapping(); new (this) Mapping(other); return *this; } const KernelMapping map; // The corresponding KernelMapping in the recording. During recording, // equal to 'map'. const KernelMapping recorded_map; }; typedef std::map MemoryMap; typedef std::shared_ptr shr_ptr; ~AddressSpace(); /** * Call this after a new task has been cloned within this * address space. */ void after_clone(); /** * Call this after a successful execve syscall has completed. At this point * it is safe to perform remote syscalls. */ void post_exec_syscall(Task* t); /** * Change the program data break of this address space to * |addr|. Only called during recording! */ void brk(remote_ptr addr, int prot); /** * This can only be called during recording. */ remote_ptr current_brk() const { assert(!brk_end.is_null()); return brk_end; } /** * Dump a representation of |this| to stderr in a format * similar to /proc/[tid]/maps. * * XXX/ostream-ify me. */ void dump() const; /** * Return true if this was created as the result of an exec() * call, instead of cloned from another address space. */ bool execed() const { return !is_clone; } /** * Return tid of the first task for this address space. */ pid_t leader_tid() const { return leader_tid_; } /** * Return AddressSpaceUid for this address space. */ AddressSpaceUid uid() const { return AddressSpaceUid(leader_tid_, leader_serial, exec_count); } Session* session() const { return session_; } SupportedArch arch() const; /** * Return the path this address space was exec()'d with. */ const std::string& exe_image() const { return exe; } /** * Assuming the last retired instruction has raised a SIGTRAP * and might be a breakpoint trap instruction, return the type * of breakpoint set at |ip() - sizeof(breakpoint_insn)|, if * one exists. Otherwise return TRAP_NONE. */ TrapType get_breakpoint_type_for_retired_insn(remote_code_ptr ip); /** * Return the type of breakpoint that's been registered for * |addr|. */ TrapType get_breakpoint_type_at_addr(remote_code_ptr addr); /** * Returns true when the breakpoint at |addr| is in private * non-writeable memory. When this returns true, the breakpoint can't be * overwritten by the tracee without an intervening mprotect or mmap * syscall. */ bool is_breakpoint_in_private_read_only_memory(remote_code_ptr addr); /** * The buffer |dest| of length |length| represents the contents of tracee * memory at |addr|. Replace the bytes in |dest| that have been overwritten * by breakpoints with the original data that was replaced by the breakpoints. */ void replace_breakpoints_with_original_values(uint8_t* dest, size_t length, remote_ptr addr); /** * Map |num_bytes| into this address space at |addr|, with * |prot| protection and |flags|. The pages are (possibly * initially) backed starting at |offset| of |res|. |fsname|, |device| and * |inode| are values that will appear in the /proc//maps entry. * |*recorded_map| is the mapping during recording, or null if the mapping * during recording is known to be the same as the new map (e.g. because * we are recording!). */ KernelMapping map( remote_ptr addr, size_t num_bytes, int prot, int flags, off64_t offset_bytes, const std::string& fsname, dev_t device, ino_t inode, const KernelMapping* recorded_map = nullptr, TraceWriter::MappingOrigin origin = TraceWriter::SYSCALL_MAPPING); /** * Return the mapping and mapped resource for the byte at address 'addr'. * There must be such a mapping. */ const Mapping& mapping_of(remote_ptr addr) const; /** * Return true if there is some mapping for the byte at 'addr'. */ bool has_mapping(remote_ptr addr) const; /** * Object that generates robust iterators through the memory map. The * memory map can be updated without invalidating iterators, as long as * Mappings are not added or removed. */ class Maps { public: Maps(const AddressSpace& outer, remote_ptr start) : outer(outer), start(start) {} class iterator { public: iterator(const iterator& it) = default; const iterator& operator++() { ptr = to_it()->second.map.end(); return *this; } bool operator==(const iterator& other) const { return to_it() == other.to_it(); } bool operator!=(const iterator& other) const { return !(*this == other); } const Mapping* operator->() const { return &to_it()->second; } Mapping operator*() const { return to_it()->second; } iterator& operator=(const iterator& other) { this->~iterator(); new (this) iterator(other); return *this; } private: friend class Maps; iterator(const MemoryMap& outer, remote_ptr ptr) : outer(outer), ptr(ptr), at_end(false) {} iterator(const MemoryMap& outer) : outer(outer), at_end(true) {} MemoryMap::const_iterator to_it() const { return at_end ? outer.end() : outer.lower_bound(MemoryRange(ptr, ptr)); } const MemoryMap& outer; remote_ptr ptr; bool at_end; }; iterator begin() const { return iterator(outer.mem, start); } iterator end() const { return iterator(outer.mem); } private: const AddressSpace& outer; remote_ptr start; }; friend class Maps; Maps maps() const { return Maps(*this, remote_ptr()); } Maps maps_starting_at(remote_ptr start) { return Maps(*this, start); } /** * Change the protection bits of [addr, addr + num_bytes) to * |prot|. */ void protect(remote_ptr addr, size_t num_bytes, int prot); /** * Fix up mprotect registers parameters to take account of PROT_GROWSDOWN. */ void fixup_mprotect_growsdown_parameters(Task* t); /** * Move the mapping [old_addr, old_addr + old_num_bytes) to * [new_addr, old_addr + new_num_bytes), preserving metadata. */ void remap(remote_ptr old_addr, size_t old_num_bytes, remote_ptr new_addr, size_t new_num_bytes); /** * Notify that the stack segment 'mapping' has grown down to a new start * address. */ KernelMapping fix_stack_segment_start(const MemoryRange& mapping, remote_ptr new_start); /** * Notify that data was written to this address space by rr or * by the kernel. */ void notify_written(remote_ptr addr, size_t num_bytes); /** Ensure a breakpoint of |type| is set at |addr|. */ bool add_breakpoint(remote_code_ptr addr, TrapType type); /** * Remove a |type| reference to the breakpoint at |addr|. If * the removed reference was the last, the breakpoint is * destroyed. */ void remove_breakpoint(remote_code_ptr addr, TrapType type); /** * Destroy all breakpoints in this VM, regardless of their * reference counts. */ void remove_all_breakpoints(); /** * Manage watchpoints. Analogous to breakpoint-managing * methods above, except that watchpoints can be set for an * address range. */ bool add_watchpoint(remote_ptr addr, size_t num_bytes, WatchType type); void remove_watchpoint(remote_ptr addr, size_t num_bytes, WatchType type); void remove_all_watchpoints(); std::vector all_watchpoints(); /** * Save all watchpoint state onto a stack. */ void save_watchpoints(); /** * Pop all watchpoint state from the saved-state stack. */ bool restore_watchpoints(); /** * Notify that at least one watchpoint was hit --- recheck them all. * Returns true if any watchpoint actually triggered. Note that * debug_status can indicate a hit watchpoint that doesn't actually * trigger, because the value of a write-watchpoint did not change. * Likewise, debug_status can indicate a watchpoint wasn't hit that * actually was (because in some configurations, e.g. VMWare * hypervisor with 32-bit x86 guest, debug_status watchpoint bits * are known to not be set on singlestep). */ bool notify_watchpoint_fired(uintptr_t debug_status); /** * Return all changed watchpoints in |watches| and clear their changed flags. */ std::vector consume_watchpoint_changes(); /** * Make [addr, addr + num_bytes) inaccesible within this * address space. */ void unmap(remote_ptr addr, ssize_t num_bytes); /** * Notification of madvise call. */ void advise(remote_ptr addr, ssize_t num_bytes, int advice); /** Return the vdso mapping of this. */ KernelMapping vdso() const; /** * Verify that this cached address space matches what the * kernel thinks it should be. */ void verify(Task* t) const; bool has_breakpoints() { return !breakpoints.empty(); } bool has_watchpoints() { return !watchpoints.empty(); } // Encoding of the |int $3| instruction. static const uint8_t breakpoint_insn = 0xCC; ScopedFd& mem_fd() { return child_mem_fd; } void set_mem_fd(ScopedFd&& fd) { child_mem_fd = std::move(fd); } Monkeypatcher& monkeypatcher() { assert(monkeypatch_state); return *monkeypatch_state; } /** * Call this only during recording. */ void at_preload_init(Task* t); /* The address of the syscall instruction from which traced syscalls made by * the syscallbuf will originate. */ remote_code_ptr traced_syscall_ip() const { return traced_syscall_ip_; } /* The address of the syscall instruction from which privileged traced * syscalls made by the syscallbuf will originate. */ remote_code_ptr privileged_traced_syscall_ip() const { return privileged_traced_syscall_ip_; } /* Start and end of the mapping of the syscallbuf code * section, used to determine whether a tracee's $ip is in the * lib. */ remote_ptr syscallbuf_lib_start() const { return syscallbuf_lib_start_; } remote_ptr syscallbuf_lib_end() const { return syscallbuf_lib_end_; } bool syscallbuf_enabled() const { return syscallbuf_lib_start_ != nullptr; } /** * We'll map a page of memory here into every exec'ed process for our own * use. */ static remote_ptr rr_page_start() { return RR_PAGE_ADDR; } /** * This might not be the length of an actual system page, but we allocate * at least this much space. */ static uint32_t rr_page_size() { return 4096; } static remote_ptr rr_page_end() { return rr_page_start() + rr_page_size(); } /** * ip() when we're in an untraced system call; same for all supported * architectures (hence static). */ static remote_code_ptr rr_page_ip_in_untraced_syscall() { return RR_PAGE_IN_UNTRACED_SYSCALL_ADDR; } /** * ip() when we're in an untraced replayed system call; same for all supported * architectures (hence static). */ static remote_code_ptr rr_page_ip_in_untraced_replayed_syscall() { return RR_PAGE_IN_UNTRACED_REPLAYED_SYSCALL_ADDR; } /** * This doesn't need to be the same for all architectures, but may as well * make it so. */ static remote_code_ptr rr_page_ip_in_traced_syscall() { return RR_PAGE_IN_TRACED_SYSCALL_ADDR; } /** * ip() when we're in an untraced system call; same for all supported * architectures (hence static). */ static remote_code_ptr rr_page_ip_in_privileged_untraced_syscall() { return RR_PAGE_IN_PRIVILEGED_UNTRACED_SYSCALL_ADDR; } /** * This doesn't need to be the same for all architectures, but may as well * make it so. */ static remote_code_ptr rr_page_ip_in_privileged_traced_syscall() { return RR_PAGE_IN_PRIVILEGED_TRACED_SYSCALL_ADDR; } /** * ip() of the untraced traced system call instruction. */ remote_code_ptr rr_page_untraced_syscall_ip(SupportedArch arch) { return rr_page_ip_in_untraced_syscall().decrement_by_syscall_insn_length( arch); } /** * ip() of the traced traced system call instruction. */ remote_code_ptr rr_page_traced_syscall_ip(SupportedArch arch) { return rr_page_ip_in_traced_syscall().decrement_by_syscall_insn_length( arch); } /** * ip() of the privileged untraced traced system call instruction. */ remote_code_ptr rr_page_privileged_untraced_syscall_ip(SupportedArch arch) { return rr_page_ip_in_privileged_untraced_syscall() .decrement_by_syscall_insn_length(arch); } /** * ip() of the privileged traced traced system call instruction. */ remote_code_ptr rr_page_privileged_traced_syscall_ip(SupportedArch arch) { return rr_page_ip_in_privileged_traced_syscall() .decrement_by_syscall_insn_length(arch); } /** * Locate a syscall instruction in t's VDSO. * This gives us a way to execute remote syscalls without having to write * a syscall instruction into executable tracee memory (which might not be * possible with some kernels, e.g. PaX). */ remote_code_ptr find_syscall_instruction(Task* t); /** * Task |t| just forked from this address space. Apply dont_fork settings. */ void did_fork_into(Task* t); void set_first_run_event(TraceFrame::Time event) { first_run_event_ = event; } TraceFrame::Time first_run_event() { return first_run_event_; } const std::vector& saved_auxv() { return saved_auxv_; } void save_auxv(Task* t); /** * Reads the /proc//maps entry for a specific address. Does no caching. * If performed on a file in a btrfs file system, this may return the * wrong device number! If you stick to anonymous or special file * mappings, this should be OK. */ KernelMapping read_kernel_mapping(Task* t, remote_ptr addr); private: class Breakpoint; typedef std::map BreakpointMap; class Watchpoint; AddressSpace(Task* t, const std::string& exe, uint32_t exec_count); AddressSpace(Session* session, const AddressSpace& o, pid_t leader_tid, uint32_t leader_serial, uint32_t exec_count); /** * After an exec, populate the new address space of |t| with * the existing mappings we find in /proc/maps. */ void populate_address_space(Task* t); void unmap_internal(remote_ptr addr, ssize_t num_bytes); // Also sets brk_ptr. void map_rr_page(Task* t); bool update_watchpoint_value(const MemoryRange& range, Watchpoint& watchpoint); void update_watchpoint_values(remote_ptr start, remote_ptr end); enum WatchpointFilter { ALL_WATCHPOINTS, CHANGED_WATCHPOINTS }; std::vector get_watchpoints_internal(WatchpointFilter filter); enum WillSetTaskState { SETTING_TASK_STATE, NOT_SETTING_TASK_STATE }; std::vector get_watch_configs( WillSetTaskState will_set_task_state); /** * Construct a minimal set of watchpoints to be enabled based * on |set_watchpoint()| calls, and program them for each task * in this address space. */ bool allocate_watchpoints(); /** * Merge the mappings adjacent to |it| in memory that are * semantically "adjacent mappings" of the same resource as * well, for example have adjacent file offsets and the same * prot and flags. */ void coalesce_around(MemoryMap::iterator it); /** * Erase |it| from |breakpoints| and restore any memory in * this it may have overwritten. */ void destroy_breakpoint(BreakpointMap::const_iterator it); /** * For each mapped segment overlapping [addr, addr + * num_bytes), call |f|. Pass |f| the overlapping mapping, * the mapped resource, and the range of addresses remaining * to be iterated over. * * Pass |ITERATE_CONTIGUOUS| to stop iterating when the last * contiguous mapping after |addr| within the region is seen. * Default is to iterate all mappings in the region. */ enum { ITERATE_DEFAULT, ITERATE_CONTIGUOUS }; void for_each_in_range( remote_ptr addr, ssize_t num_bytes, std::function f, int how = ITERATE_DEFAULT); /** * Map |m| of |r| into this address space, and coalesce any * mappings of |r| that are adjacent to |m|. */ void map_and_coalesce(const KernelMapping& m, const KernelMapping& recorded_map); /** * Call this only during recording. */ template void at_preload_init_arch(Task* t); enum { EXEC_BIT = 1 << 0, READ_BIT = 1 << 1, WRITE_BIT = 1 << 2 }; /** Return the access bits above needed to watch |type|. */ static int access_bits_of(WatchType type); /** * Represents a refcount set on a particular address. Because there * can be multiple refcounts of multiple types set on a single * address, Breakpoint stores explicit USER and INTERNAL breakpoint * refcounts. Clients adding/removing breakpoints at this addr must * call ref()/unref() as appropropiate. */ struct Breakpoint { Breakpoint() : internal_count(0), user_count(0) {} Breakpoint(const Breakpoint& o) = default; // AddressSpace::destroy_all_breakpoints() can cause this // destructor to be invoked while we have nonzero total // refcount, so the most we can assert is that the refcounts // are valid. ~Breakpoint() { assert(internal_count >= 0 && user_count >= 0); } void ref(TrapType which) { assert(internal_count >= 0 && user_count >= 0); ++*counter(which); } int unref(TrapType which) { assert(internal_count > 0 || user_count > 0); --*counter(which); assert(internal_count >= 0 && user_count >= 0); return internal_count + user_count; } TrapType type() const { // NB: USER breakpoints need to be processed before // INTERNAL ones. We want to give the debugger a // chance to dispatch commands before we attend to the // internal rr business. So if there's a USER "ref" // on the breakpoint, treat it as a USER breakpoint. return user_count > 0 ? TRAP_BKPT_USER : TRAP_BKPT_INTERNAL; } size_t data_length() { return 1; } uint8_t* original_data() { return &overwritten_data; } // "Refcounts" of breakpoints set at |addr|. The breakpoint // object must be unique since we have to save the overwritten // data, and we can't enforce the order in which breakpoints // are set/removed. int internal_count, user_count; uint8_t overwritten_data; static_assert(sizeof(overwritten_data) == sizeof(AddressSpace::breakpoint_insn), "Must have the same size."); int* counter(TrapType which) { assert(TRAP_BKPT_INTERNAL == which || TRAP_BKPT_USER == which); int* p = TRAP_BKPT_USER == which ? &user_count : &internal_count; assert(*p >= 0); return p; } }; // XXX one is tempted to merge Breakpoint and Watchpoint into a single // entity, but the semantics are just different enough that separate // objects are easier for now. /** * Track the watched accesses of a contiguous range of memory * addresses. */ class Watchpoint { public: Watchpoint(size_t num_bytes) : exec_count(0), read_count(0), write_count(0), value_bytes(num_bytes), valid(false), changed(false) {} Watchpoint(const Watchpoint&) = default; ~Watchpoint() { assert_valid(); } void watch(int which) { assert_valid(); exec_count += (EXEC_BIT & which) != 0; read_count += (READ_BIT & which) != 0; write_count += (WRITE_BIT & which) != 0; } int unwatch(int which) { assert_valid(); if (EXEC_BIT & which) { assert(exec_count > 0); --exec_count; } if (READ_BIT & which) { assert(read_count > 0); --read_count; } if (WRITE_BIT & which) { assert(write_count > 0); --write_count; } return exec_count + read_count + write_count; } int watched_bits() const { return (exec_count > 0 ? EXEC_BIT : 0) | (read_count > 0 ? READ_BIT : 0) | (write_count > 0 ? WRITE_BIT : 0); } void assert_valid() const { assert(exec_count >= 0 && read_count >= 0 && write_count >= 0); } // Watchpoints stay alive until all watched access typed have // been cleared. We track refcounts of each watchable access // separately. int exec_count, read_count, write_count; // Debug registers allocated for read/exec access checking. // Write watchpoints are always triggered by checking for actual memory // value changes. Read/exec watchpoints can't be triggered that way, so // we look for these registers being triggered instead. std::vector debug_regs_for_exec_read; std::vector value_bytes; bool valid; bool changed; }; // All breakpoints set in this VM. BreakpointMap breakpoints; /* Path of the real executable image this address space was * exec()'d with. */ std::string exe; /* Pid of first task for this address space */ pid_t leader_tid_; /* Serial number of first task for this address space */ uint32_t leader_serial; uint32_t exec_count; // Only valid during recording remote_ptr brk_start; /* Current brk. Not necessarily page-aligned. */ remote_ptr brk_end; /* Were we cloned from another address space? */ bool is_clone; /* All segments mapped into this address space. */ MemoryMap mem; /* madvise DONTFORK regions */ std::set dont_fork; // The session that created this. We save a ref to it so that // we can notify it when we die. Session* session_; /* First mapped byte of the vdso. */ remote_ptr vdso_start_addr; // The monkeypatcher that's handling this address space. std::unique_ptr monkeypatch_state; // The watchpoints set for tasks in this VM. Watchpoints are // programmed per Task, but we track them per address space on // behalf of debuggers that assume that model. std::map watchpoints; std::vector > saved_watchpoints; // Tracee memory is read and written through this fd, which is // opened for the tracee's magic /proc/[tid]/mem device. The // advantage of this over ptrace is that we can access it even // when the tracee isn't at a ptrace-stop. It's also // theoretically faster for large data transfers, which rr can // do often. // // Users of child_mem_fd should fall back to ptrace-based memory // access when child_mem_fd is not open. ScopedFd child_mem_fd; remote_code_ptr traced_syscall_ip_; remote_code_ptr privileged_traced_syscall_ip_; remote_ptr syscallbuf_lib_start_; remote_ptr syscallbuf_lib_end_; std::vector saved_auxv_; /** * The time of the first event that ran code for a task in this address space. * 0 if no such event has occurred. */ TraceFrame::Time first_run_event_; /** * For each architecture, the offset of a syscall instruction with that * architecture's VDSO, or 0 if not known. */ static uint32_t offset_to_syscall_in_vdso[SupportedArch_MAX + 1]; /** * Ensure that the cached mapping of |t| matches /proc/maps, * using adjancent-map-merging heuristics that are as lenient * as possible given the data available from /proc/maps. */ static void check_segment_iterator(void* vasp, Task* t, const struct map_iterator_data* data); AddressSpace operator=(const AddressSpace&) = delete; }; #endif /* RR_ADDRESS_SPACE_H_ */ rr-4.1.0/src/AutoRemoteSyscalls.cc000066400000000000000000000360631265436462100170600ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "RemoteSyscalls" #include "AutoRemoteSyscalls.h" #include #include "rr/rr.h" #include "kernel_metadata.h" #include "log.h" #include "ReplaySession.h" #include "Session.h" #include "task.h" #include "util.h" using namespace rr; using namespace std; /** * The ABI of the socketcall syscall is a nightmare; the first arg to * the kernel is the sub-operation, and the second argument is a * pointer to the args. The args depend on the sub-op. */ template struct socketcall_args { typename Arch::signed_long args[3]; } __attribute__((packed)); void AutoRestoreMem::init(const uint8_t* mem, ssize_t num_bytes) { ASSERT(remote.task(), !remote.regs().sp().is_null()) << "Memory parameters were disabled"; len = num_bytes; saved_sp = remote.regs().sp(); remote.regs().set_sp(remote.regs().sp() - len); remote.task()->set_regs(remote.regs()); addr = remote.regs().sp(); data.resize(len); remote.task()->read_bytes_helper(addr, len, data.data()); if (mem) { remote.task()->write_bytes_helper(addr, len, mem); } } AutoRestoreMem::~AutoRestoreMem() { assert(saved_sp == remote.regs().sp() + len); remote.task()->write_bytes_helper(addr, len, data.data()); remote.regs().set_sp(remote.regs().sp() + len); remote.task()->set_regs(remote.regs()); } AutoRemoteSyscalls::AutoRemoteSyscalls(Task* t, MemParamsEnabled enable_mem_params) : t(t), initial_regs(t->regs()), initial_ip(t->ip()), initial_sp(t->regs().sp()), pending_syscallno(-1) { // We could use privilged_traced_syscall_ip() here, but we don't actually // need privileges because tracee seccomp filters are modified to only // produce PTRACE_SECCOMP_EVENTs that we ignore. And before the rr page is // loaded, the privileged_traced_syscall_ip is not available. initial_regs.set_ip(t->vm()->traced_syscall_ip()); if (enable_mem_params == ENABLE_MEMORY_PARAMS) { maybe_fix_stack_pointer(); } else { initial_regs.set_sp(remote_ptr()); } } static bool is_usable_area(const KernelMapping& km) { return (km.prot() & (PROT_READ | PROT_WRITE)) == (PROT_READ | PROT_WRITE) && (km.flags() & MAP_PRIVATE); } void AutoRemoteSyscalls::maybe_fix_stack_pointer() { if (!t->session().can_validate()) { return; } remote_ptr last_stack_byte = t->regs().sp() - 1; if (t->vm()->has_mapping(last_stack_byte)) { auto m = t->vm()->mapping_of(last_stack_byte); if (is_usable_area(m.map) && m.map.start() + 2048 <= t->regs().sp()) { // 'sp' is in a stack region and there's plenty of space there. No need // to fix anything. return; } } MemoryRange found_stack; for (auto m : t->vm()->maps()) { if (is_usable_area(m.map)) { found_stack = m.map; break; } }; ASSERT(t, !found_stack.start().is_null()) << "No stack area found"; initial_regs.set_sp(found_stack.end()); } AutoRemoteSyscalls::~AutoRemoteSyscalls() { restore_state_to(t); } void AutoRemoteSyscalls::restore_state_to(Task* t) { initial_regs.set_ip(initial_ip); initial_regs.set_sp(initial_sp); // Restore stomped registers. t->set_regs(initial_regs); } void AutoRemoteSyscalls::syscall_helper(SyscallWaiting wait, int syscallno, Registers& callregs) { callregs.set_syscallno(syscallno); t->set_regs(callregs); t->advance_syscall(); ASSERT(t, t->regs().ip() - callregs.ip() == syscall_instruction_length(t->arch())) << "Should have advanced ip by one syscall_insn"; ASSERT(t, t->regs().original_syscallno() == syscallno) << "Should be entering " << t->syscall_name(syscallno) << ", but instead at " << t->syscall_name(t->regs().original_syscallno()); // Start running the syscall. pending_syscallno = syscallno; t->resume_execution(RESUME_SYSCALL, RESUME_NONBLOCKING, RESUME_NO_TICKS); if (WAIT == wait) { wait_syscall(syscallno); } } void AutoRemoteSyscalls::wait_syscall(int syscallno) { ASSERT(t, pending_syscallno == syscallno || syscallno < 0); // Wait for syscall-exit trap. t->wait(); pending_syscallno = -1; ASSERT(t, t->regs().original_syscallno() == syscallno || syscallno < 0) << "Should be entering " << t->syscall_name(syscallno) << ", but instead at " << t->syscall_name(t->regs().original_syscallno()); } SupportedArch AutoRemoteSyscalls::arch() const { return t->arch(); } template static void write_socketcall_args(Task* t, remote_ptr remote_mem, typename Arch::signed_long arg1, typename Arch::signed_long arg2, typename Arch::signed_long arg3) { socketcall_args sc_args = { { arg1, arg2, arg3 } }; t->write_mem(remote_mem.cast >(), sc_args); } static size_t align_size(size_t size) { static int align_amount = sizeof(uintptr_t); return (size + align_amount - 1) & ~(align_amount - 1); } static remote_ptr allocate(remote_ptr* buf_end, const AutoRestoreMem& remote_buf, size_t size) { remote_ptr r = *buf_end; *buf_end += align_size(size); assert(size_t(*buf_end - remote_buf.get()) <= remote_buf.size()); return r; } template static remote_ptr allocate(remote_ptr* buf_end, const AutoRestoreMem& remote_buf) { return allocate(buf_end, remote_buf, sizeof(T)).cast(); } static int create_bind_and_listen_socket(const char* path) { struct sockaddr_un addr; int listen_sock = socket(AF_UNIX, SOCK_STREAM, 0); if (listen_sock < 0) { FATAL() << "Failed to create listen socket"; } addr.sun_family = AF_UNIX; strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1); addr.sun_path[sizeof(addr.sun_path) - 1] = 0; if (::bind(listen_sock, (struct sockaddr*)&addr, sizeof(addr))) { FATAL() << "Failed to bind listen socket"; } if (listen(listen_sock, 1)) { FATAL() << "Failed to mark listening for listen socket"; } return listen_sock; } template static int child_create_socket(AutoRemoteSyscalls& remote, remote_ptr > sc_args) { int child_sock; if (sc_args.is_null()) { child_sock = remote.infallible_syscall(Arch::socket, AF_UNIX, SOCK_STREAM, 0); } else { write_socketcall_args(remote.task(), sc_args, AF_UNIX, SOCK_STREAM, 0); child_sock = remote.infallible_syscall(Arch::socketcall, SYS_SOCKET, sc_args); } return child_sock; } template static void child_connect_socket(AutoRemoteSyscalls& remote, AutoRestoreMem& remote_buf, remote_ptr > sc_args, remote_ptr buf_end, int child_sock, const char* path, int* cwd_fd) { typename Arch::sockaddr_un addr; addr.sun_family = AF_UNIX; assert(strlen(path) < sizeof(addr.sun_path)); // Skip leading '/' since we're going to access this relative to the root assert(path[0] == '/'); strcpy(addr.sun_path, path + 1); auto tmp_buf_end = buf_end; auto remote_dot = allocate(&tmp_buf_end, remote_buf, 2); remote.task()->write_mem(remote_dot.cast(), ".", 2); *cwd_fd = remote.infallible_syscall(syscall_number_for_open(Arch::arch()), remote_dot, O_PATH | O_DIRECTORY); remote.infallible_syscall(Arch::fchdir, RR_RESERVED_ROOT_DIR_FD); auto remote_addr = allocate(&buf_end, remote_buf); remote.task()->write_mem(remote_addr, addr); Registers callregs = remote.regs(); int remote_syscall; if (sc_args.is_null()) { callregs.set_arg1(child_sock); callregs.set_arg2(remote_addr); callregs.set_arg3(sizeof(addr)); remote_syscall = Arch::connect; } else { write_socketcall_args(remote.task(), sc_args, child_sock, remote_addr.as_int(), sizeof(addr)); callregs.set_arg1(SYS_CONNECT); callregs.set_arg2(sc_args); remote_syscall = Arch::socketcall; } remote.syscall_helper(AutoRemoteSyscalls::DONT_WAIT, remote_syscall, callregs); } template static void child_sendmsg(AutoRemoteSyscalls& remote, AutoRestoreMem& remote_buf, remote_ptr > sc_args, remote_ptr buf_end, int child_sock, int fd) { char cmsgbuf[Arch::cmsg_space(sizeof(fd))]; // Pull the puppet strings to have the child send its fd // to us. Similarly to above, we DONT_WAIT on the // call to finish, since it's likely not defined whether the // sendmsg() may block on our recvmsg()ing what the tracee // sent us (in which case we would deadlock with the tracee). // We call sendmsg on child socket, but first we have to prepare a lot of // data. auto remote_msg = allocate(&buf_end, remote_buf); auto remote_msgdata = allocate(&buf_end, remote_buf); auto remote_cmsgbuf = allocate(&buf_end, remote_buf, sizeof(cmsgbuf)); // Unfortunately we need to send at least one byte of data in our // message for it to work typename Arch::iovec msgdata; msgdata.iov_base = remote_msg; // doesn't matter much, we ignore the data msgdata.iov_len = 1; remote.task()->write_mem(remote_msgdata, msgdata); typename Arch::msghdr msg; memset(&msg, 0, sizeof(msg)); msg.msg_control = remote_cmsgbuf; msg.msg_controllen = sizeof(cmsgbuf); msg.msg_iov = remote_msgdata; msg.msg_iovlen = 1; remote.task()->write_mem(remote_msg, msg); auto cmsg = reinterpret_cast(cmsgbuf); cmsg->cmsg_len = Arch::cmsg_len(sizeof(fd)); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; *static_cast(Arch::cmsg_data(cmsg)) = fd; remote.task()->write_bytes_helper(remote_cmsgbuf, sizeof(cmsgbuf), &cmsgbuf); Registers callregs = remote.regs(); int remote_syscall; if (sc_args.is_null()) { callregs.set_arg1(child_sock); callregs.set_arg2(remote_msg); callregs.set_arg3(0); remote_syscall = Arch::sendmsg; } else { write_socketcall_args(remote.task(), sc_args, child_sock, remote_msg.as_int(), 0); callregs.set_arg1(SYS_SENDMSG); callregs.set_arg2(sc_args); remote_syscall = Arch::socketcall; } remote.syscall_helper(AutoRemoteSyscalls::DONT_WAIT, remote_syscall, callregs); } static int recvmsg_socket(int sock) { char cmsgbuf[CMSG_SPACE(sizeof(int))]; char received_data; struct iovec msgdata; msgdata.iov_base = &received_data; msgdata.iov_len = 1; struct msghdr msg; memset(&msg, 0, sizeof(msg)); msg.msg_control = cmsgbuf; msg.msg_controllen = sizeof(cmsgbuf); msg.msg_iov = &msgdata; msg.msg_iovlen = 1; if (0 > recvmsg(sock, &msg, 0)) { FATAL() << "Failed to receive fd"; } struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); assert(cmsg && cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS); int our_fd = *(int*)CMSG_DATA(cmsg); assert(our_fd >= 0); return our_fd; } template static size_t reserve() { return align_size(sizeof(T)); } template ScopedFd AutoRemoteSyscalls::retrieve_fd_arch(int fd) { size_t data_length = std::max(reserve(), reserve() + align_size(Arch::cmsg_space(sizeof(fd))) + reserve()); if (has_socketcall_syscall(Arch::arch())) { data_length += reserve >(); } AutoRestoreMem remote_buf(*this, nullptr, data_length); remote_ptr sc_args_end = remote_buf.get(); remote_ptr > sc_args; if (has_socketcall_syscall(Arch::arch())) { sc_args = allocate >(&sc_args_end, remote_buf); } char path[PATH_MAX]; sprintf(path, "/tmp/rr-tracee-fd-transfer-%d-%ld", t->tid, random()); int listen_sock = create_bind_and_listen_socket(path); int child_sock = child_create_socket(*this, sc_args); int cwd_fd; child_connect_socket(*this, remote_buf, sc_args, sc_args_end, child_sock, path, &cwd_fd); // Now the child is waiting for us to accept it. int sock = accept(listen_sock, nullptr, nullptr); if (sock < 0) { FATAL() << "Failed to create parent socket"; } // Complete child's connect() syscall wait_syscall(); int child_syscall_result = t->regs().syscall_result_signed(); if (child_syscall_result) { FATAL() << "Failed to connect() in tracee; err=" << errno_name(-child_syscall_result); } infallible_syscall(Arch::fchdir, cwd_fd); infallible_syscall(Arch::close, cwd_fd); // Listening socket not needed anymore close(listen_sock); unlink(path); child_sendmsg(*this, remote_buf, sc_args, sc_args_end, child_sock, fd); wait_syscall(); child_syscall_result = t->regs().syscall_result_signed(); if (0 >= child_syscall_result) { FATAL() << "Failed to sendmsg() in tracee; err=" << errno_name(-child_syscall_result); } // Child may be waiting on our recvmsg(). int our_fd = recvmsg_socket(sock); child_syscall_result = infallible_syscall(Arch::close, child_sock); if (0 > close(sock)) { FATAL() << "Failed to close parent socket"; } return ScopedFd(our_fd); } ScopedFd AutoRemoteSyscalls::retrieve_fd(int fd) { RR_ARCH_FUNCTION(retrieve_fd_arch, arch(), fd); } remote_ptr AutoRemoteSyscalls::infallible_mmap_syscall( remote_ptr addr, size_t length, int prot, int flags, int child_fd, uint64_t offset_pages) { // The first syscall argument is called "arg 1", so // our syscall-arg-index template parameter starts // with "1". remote_ptr ret = has_mmap2_syscall(arch()) ? infallible_syscall_ptr(syscall_number_for_mmap2(arch()), addr, length, prot, flags, child_fd, (off_t)offset_pages) : infallible_syscall_ptr(syscall_number_for_mmap(arch()), addr, length, prot, flags, child_fd, offset_pages * page_size()); if (flags & MAP_FIXED) { ASSERT(t, addr == ret) << "MAP_FIXED at " << addr << " but got " << ret; } return ret; } void AutoRemoteSyscalls::check_syscall_result(int syscallno) { long ret = t->regs().syscall_result_signed(); if (-4096 < ret && ret < 0) { string extra_msg; if (is_open_syscall(syscallno, arch())) { extra_msg = " opening " + t->read_c_str(t->regs().arg1()); } else if (is_openat_syscall(syscallno, arch())) { extra_msg = " opening " + t->read_c_str(t->regs().arg2()); } ASSERT(t, false) << "Syscall " << syscall_name(syscallno, arch()) << " failed with errno " << errno_name(-ret) << extra_msg; } } rr-4.1.0/src/AutoRemoteSyscalls.h000066400000000000000000000171461265436462100167230ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_AUTO_REMOTE_SYSCALLS_H_ #define RR_AUTO_REMOTE_SYSCALLS_H_ #include #include #include "Registers.h" #include "ScopedFd.h" #include "task.h" class AutoRemoteSyscalls; class Task; /** * Helpers to make remote syscalls on behalf of a Task. Usage looks * like * * AutoRemoteSyscalls remote(t); // prepare remote syscalls * remote.syscall(syscall_number_for_open(remote.arch()), ...); // make *syscalls * ... * // when |remote| goes out of scope, remote syscalls are finished */ /** * Cookie used to restore stomped memory, usually prepared as the * argument to a remote syscall. */ class AutoRestoreMem { public: /** * Write |mem| into address space of the Task prepared for * remote syscalls in |remote|, in such a way that the write * will be undone. The address of the reserved mem space is * available via |get|. * If |mem| is null, data is not written, only the space is reserved. */ AutoRestoreMem(AutoRemoteSyscalls& remote, const uint8_t* mem, ssize_t num_bytes) : remote(remote) { init(mem, num_bytes); } /** * Convenience constructor for pushing a C string |str|, including * the trailing '\0' byte. */ AutoRestoreMem(AutoRemoteSyscalls& remote, const char* str) : remote(remote) { init((const uint8_t*)str, strlen(str) + 1 /*null byte*/); } ~AutoRestoreMem(); /** * Get a pointer to the reserved memory. */ remote_ptr get() const { return addr; } /** * Return size of reserved memory buffer. */ size_t size() const { return data.size(); } private: void init(const uint8_t* mem, ssize_t num_bytes); AutoRemoteSyscalls& remote; /* Address of tmp mem. */ remote_ptr addr; /* Saved data. */ std::vector data; /* (We keep this around for error checking.) */ remote_ptr saved_sp; /* Length of tmp mem. */ size_t len; AutoRestoreMem& operator=(const AutoRestoreMem&) = delete; AutoRestoreMem(const AutoRestoreMem&) = delete; void* operator new(size_t) = delete; void operator delete(void*) = delete; }; /** * RAII helper to prepare a Task for remote syscalls and undo any * preparation upon going out of scope. */ class AutoRemoteSyscalls { public: enum MemParamsEnabled { ENABLE_MEMORY_PARAMS, DISABLE_MEMORY_PARAMS }; /** * Prepare |t| for a series of remote syscalls. * * NBBB! Before preparing for a series of remote syscalls, * the caller *must* ensure the callee will not receive any * signals. This code does not attempt to deal with signals. */ AutoRemoteSyscalls(Task* t, MemParamsEnabled enable_mem_params = ENABLE_MEMORY_PARAMS); /** * Undo in |t| any preparations that were made for a series of * remote syscalls. */ ~AutoRemoteSyscalls(); /** * If t's stack pointer doesn't look valid, temporarily adjust it to * the top of *some* stack area. */ void maybe_fix_stack_pointer(); /** * "Initial" registers saved from the target task. * * NB: a non-const reference is returned because some power * users want to update the registers that are restored after * finishing remote syscalls. Perhaps these users should be * fixed, or you should just be careful. */ Registers& regs() { return initial_regs; } /** * Undo any preparations to make remote syscalls in the context of |t|. * * This is usually called automatically by the destructor; * don't call it directly unless you really know what you'd * doing. *ESPECIALLY* don't call this on a |t| other than * the one passed to the contructor, unless you really know * what you're doing. */ void restore_state_to(Task* t); /** * Make |syscallno| with variadic |args| (limited to 6 on * x86). Return the raw kernel return value. */ template long syscall(int syscallno, Rest... args) { Registers callregs = regs(); // The first syscall argument is called "arg 1", so // our syscall-arg-index template parameter starts // with "1". syscall_helper<1>(syscallno, callregs, args...); return t->regs().syscall_result_signed(); } template long infallible_syscall(int syscallno, Rest... args) { Registers callregs = regs(); // The first syscall argument is called "arg 1", so // our syscall-arg-index template parameter starts // with "1". syscall_helper<1>(syscallno, callregs, args...); check_syscall_result(syscallno); return t->regs().syscall_result_signed(); } template remote_ptr infallible_syscall_ptr(int syscallno, Rest... args) { Registers callregs = regs(); syscall_helper<1>(syscallno, callregs, args...); check_syscall_result(syscallno); return t->regs().syscall_result(); } /** * Remote mmap syscalls are common and non-trivial due to the need to * select either mmap2 or mmap. */ remote_ptr infallible_mmap_syscall(remote_ptr addr, size_t length, int prot, int flags, int child_fd, uint64_t offset_pages); /** The Task in the context of which we're making syscalls. */ Task* task() const { return t; } /** * A small helper to get at the Task's arch. * Out-of-line to avoid including task.h here. */ SupportedArch arch() const; /** * Arranges for 'fd' to be transmitted to this process and returns * our opened version of it. */ ScopedFd retrieve_fd(int fd); /** * Remotely invoke in |t| the specified syscall with the given * arguments. The arguments must of course be valid in |t|, * and no checking of that is done by this function. * * If |wait| is |WAIT|, the syscall is finished in |t| and the * result is returned. Otherwise if it's |DONT_WAIT|, the * syscall is initiated but *not* finished in |t|, and the * return value is undefined. Call |wait_remote_syscall()| to * finish the syscall and get the return value. */ enum SyscallWaiting { WAIT = 1, DONT_WAIT = 0 }; void syscall_helper(SyscallWaiting wait, int syscallno, Registers& callregs); private: /** * Wait for the |DONT_WAIT| syscall |syscallno| initiated by * |remote_syscall()| to finish, returning the result. * |syscallno| is only for assertion checking. If no value is passed in, * everything should work without the assertion checking. */ void wait_syscall(int syscallno = -1); void check_syscall_result(int syscallno); /** * "Recursively" build the set of syscall registers in * |callregs|. |Index| is the syscall arg that will be set to * |arg|, and |args| are the remaining arguments. */ template void syscall_helper(int syscallno, Registers& callregs, T arg, Rest... args) { callregs.set_arg(arg); syscall_helper(syscallno, callregs, args...); } /** * "Recursion" "base case": no more arguments to build, so * just make the syscall and return the kernel return value. */ template void syscall_helper(int syscallno, Registers& callregs) { syscall_helper(WAIT, syscallno, callregs); } template ScopedFd retrieve_fd_arch(int fd); Task* t; Registers initial_regs; remote_code_ptr initial_ip; remote_ptr initial_sp; int pending_syscallno; AutoRemoteSyscalls& operator=(const AutoRemoteSyscalls&) = delete; AutoRemoteSyscalls(const AutoRemoteSyscalls&) = delete; void* operator new(size_t) = delete; void operator delete(void*) = delete; }; #endif // RR_AUTO_REMOTE_SYSCALLS_H_ rr-4.1.0/src/BreakpointCondition.h000066400000000000000000000005031265436462100170530ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_BREAKPOINT_CONDITION_H_ #define RR_BREAKPOINT_CONDITION_H_ class Task; class BreakpointCondition { public: virtual ~BreakpointCondition() {} virtual bool evaluate(Task* t) const = 0; }; #endif // RR_BREAKPOINT_CONDITION_H_ rr-4.1.0/src/CPUIDBugDetector.cc000066400000000000000000000042631265436462100162470ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "CPUIDBugDetector.h" #include "Event.h" #include "Flags.h" #include "kernel_abi.h" #include "Session.h" #include "task.h" using namespace rr; extern "C" int cpuid_loop(int iterations); void CPUIDBugDetector::run_detection_code() { // Call cpuid_loop to generate trace data we can use to detect // the cpuid rcb undercount bug. This generates 4 geteuid // calls which should have 2 rcbs between each of the // 3 consecutive pairs. cpuid_loop(4); } static bool rcb_counts_ok(uint64_t prev, uint64_t current, const char* source) { if (current - prev == 2) { return true; } if (!Flags::get().suppress_environment_warnings) { fprintf( stderr, "\n" "rr: Warning: You appear to be running in a VMWare guest with a bug\n" " where a conditional branch instruction between two CPUID " "instructions\n" " sometimes fails to be counted by the conditional branch " "performance\n" " counter. Work around this problem by adding\n" " monitor_control.disable_hvsim_clusters = true\n" " to your .vmx file.\n" "\n"); } return false; } void CPUIDBugDetector::notify_reached_syscall_during_replay(Task* t) { // We only care about events that happen before the first exec, // when our detection code runs. if (t->session().can_validate()) { return; } const Event& ev = t->current_trace_frame().event(); if (!is_geteuid32_syscall(ev.Syscall().number, t->arch()) && !is_geteuid_syscall(ev.Syscall().number, t->arch())) { return; } uint64_t trace_rcb_count = t->current_trace_frame().ticks(); uint64_t actual_rcb_count = t->tick_count(); if (trace_rcb_count_at_last_geteuid32 > 0 && !detected_cpuid_bug) { if (!rcb_counts_ok(trace_rcb_count_at_last_geteuid32, trace_rcb_count, "trace") || !rcb_counts_ok(actual_rcb_count_at_last_geteuid32, actual_rcb_count, "actual")) { detected_cpuid_bug = true; } } trace_rcb_count_at_last_geteuid32 = trace_rcb_count; actual_rcb_count_at_last_geteuid32 = actual_rcb_count; } rr-4.1.0/src/CPUIDBugDetector.h000066400000000000000000000027071265436462100161120ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_CPUID_BUG_DETECTOR_H_ #define RR_CPUID_BUG_DETECTOR_H_ #include class Task; /** * Helper to detect when the "CPUID can cause rcbs to be lost" bug is present. * See http://robert.ocallahan.org/2014/09/vmware-cpuid-conditional-branch.html * * This bug is caused by VMM optimizations described in * https://www.usenix.org/system/files/conference/atc12/atc12-final158.pdf * that cause instruction sequences related to CPUID to be optimized, * eliminating the user-space execution of a conditional branch between two * CPUID instructions (in some circumstances). */ class CPUIDBugDetector { public: CPUIDBugDetector() : trace_rcb_count_at_last_geteuid32(0), actual_rcb_count_at_last_geteuid32(0), detected_cpuid_bug(false) {} /** * Call this in the context of the first spawned process to run the * code that triggers the bug. */ static void run_detection_code(); /** * Call this when task t enters a traced syscall during replay. */ void notify_reached_syscall_during_replay(Task* t); /** * Returns true when the "CPUID can cause rcbs to be lost" bug has * been detected. */ bool is_cpuid_bug_detected() { return detected_cpuid_bug; } private: uint64_t trace_rcb_count_at_last_geteuid32; uint64_t actual_rcb_count_at_last_geteuid32; bool detected_cpuid_bug; }; #endif /* RR_CPUID_BUG_DETECTOR_H_ */ rr-4.1.0/src/Command.cc000066400000000000000000000101441265436462100146240ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #define _BSD_SOURCE #include "Command.h" #include #include #include #include "main.h" #include "TraceStream.h" using namespace std; bool ParsedOption::verify_valid_int(int64_t min, int64_t max) const { if (int_value < min || int_value > max) { return false; } return true; } static vector* command_list; Command::Command(const char* name, const char* help) : name(name), help(help) { if (!command_list) { command_list = new vector(); } command_list->push_back(this); } Command* Command::command_for_name(const std::string& name) { for (auto& it : *command_list) { if (strcmp(it->name, name.c_str()) == 0) { return it; } } return nullptr; } bool Command::less_than_by_name(Command* c1, Command* c2) { return strcmp(c1->name, c2->name) < 0; } void Command::print_help_all(FILE* out) { vector cmds; for (auto& it : *command_list) { if (!it->help) { continue; } cmds.push_back(it); } sort(cmds.begin(), cmds.end(), less_than_by_name); for (auto& it : cmds) { const char* c = strchr(it->help, '\n'); if (c) { fprintf(out, "%.*s\n", (int)(c - it->help), it->help); } else { fputs(it->help, out); } } } void Command::print_help(FILE* out) { if (help) { fputs(help, out); } else { print_usage(out); } } static bool consume_args(std::vector& args, size_t count) { args.erase(args.begin(), args.begin() + count); return true; } static void assign_param(ParsedOption* opt, const char* s) { opt->value = s; opt->int_value = INT64_MIN; if (!opt->value.empty()) { char* end; int64_t v = strtoll(s, &end, 10); if (*end == 0) { opt->int_value = v; } } } bool Command::parse_option(std::vector& args, const OptionSpec* option_specs, size_t count, ParsedOption* out) { if (args.size() == 0 || args[0][0] != '-') { return false; } for (size_t i = 0; i < count; ++i) { if (args[0][1] == option_specs[i].short_name) { out->short_name = option_specs[i].short_name; switch (option_specs[i].param) { case NO_PARAMETER: if (args[0][2] == 0) { return consume_args(args, 1); } return false; case HAS_PARAMETER: if (args[0][2] != 0) { assign_param(out, args[0].c_str() + 2); return consume_args(args, 1); } if (args.size() >= 2) { assign_param(out, args[1].c_str()); return consume_args(args, 2); } return false; default: assert(0 && "Unknown parameter type"); } } else if (args[0][1] == '-') { size_t equals = args[0].find('='); if (strncmp(args[0].c_str() + 2, option_specs[i].long_name, (equals == string::npos ? args[0].length() : equals) - 2) == 0) { out->short_name = option_specs[i].short_name; switch (option_specs[i].param) { case NO_PARAMETER: return consume_args(args, 1); case HAS_PARAMETER: if (equals == string::npos) { if (args.size() >= 2) { assign_param(out, args[1].c_str()); return consume_args(args, 2); } return false; } assign_param(out, args[0].c_str() + equals + 1); return consume_args(args, 1); default: assert(0 && "Unknown parameter type"); } } } } return false; } bool Command::verify_not_option(std::vector& args) { if (args.size() > 0 && args[0][0] == '-') { fprintf(stderr, "Invalid option %s\n", args[0].c_str()); return false; } return true; } bool Command::parse_optional_trace_dir(vector& args, string* out) { if (!verify_not_option(args)) { return false; } if (args.size() > 0) { *out = args[0]; args.erase(args.begin()); } else { *out = string(); } return true; } rr-4.1.0/src/Command.h000066400000000000000000000035031265436462100144670ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_COMMAND_H_ #define RR_COMMAND_H_ #ifndef _DEFAULT_SOURCE #define _DEFAULT_SOURCE 1 #endif #include #include #include #include #include class TraceReader; enum OptionParameters { NO_PARAMETER, HAS_PARAMETER }; struct OptionSpec { char short_name; const char* long_name; OptionParameters param; }; struct ParsedOption { char short_name; std::string value; int64_t int_value; bool verify_valid_int(int64_t min = INT64_MIN + 1, int64_t max = INT64_MAX) const; }; /** * rr command-line commands. Objects of this class must be static, since * they are expected to be immortal. */ class Command { public: static Command* command_for_name(const std::string& name); static void print_help_all(FILE* out); /* Runs the command with the given parameters. Returns an exit code. */ virtual int run(std::vector& args) = 0; void print_help(FILE* out); static bool verify_not_option(std::vector& args); static bool parse_optional_trace_dir(std::vector& args, std::string* out); static bool parse_option(std::vector& args, const OptionSpec* option_specs, size_t count, ParsedOption* out); template static bool parse_option(std::vector& args, const OptionSpec(&option_specs)[N], ParsedOption* out) { return parse_option(args, option_specs, N, out); } protected: Command(const char* name, const char* help); static bool less_than_by_name(Command* c1, Command* c2); const char* name; const char* help; }; #endif // RR_COMMAND_H_ rr-4.1.0/src/CompressedReader.cc000066400000000000000000000104361265436462100165010ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "CompressedReader" #define _LARGEFILE64_SOURCE #include "CompressedReader.h" #include #include #include #include #include #include #include #include #include "CompressedWriter.h" CompressedReader::CompressedReader(const std::string& filename) : fd(new ScopedFd(filename.c_str(), O_CLOEXEC | O_RDONLY | O_LARGEFILE)) { fd_offset = 0; error = !fd->is_open(); eof = false; buffer_read_pos = 0; have_saved_state = false; } CompressedReader::CompressedReader(const CompressedReader& other) { fd = other.fd; fd_offset = other.fd_offset; error = other.error; eof = other.eof; buffer_read_pos = other.buffer_read_pos; buffer = other.buffer; have_saved_state = false; assert(!other.have_saved_state); } CompressedReader::~CompressedReader() { close(); } static bool read_all(const ScopedFd& fd, size_t size, void* data, uint64_t* offset) { while (size > 0) { ssize_t result = pread(fd, data, size, *offset); if (result <= 0) { return false; } size -= result; data = static_cast(data) + result; *offset += result; } return true; } static bool do_decompress(std::vector& compressed, std::vector& uncompressed) { z_stream stream; memset(&stream, 0, sizeof(stream)); int result = inflateInit(&stream); if (result != Z_OK) { assert(0 && "inflateInit failed!"); return false; } stream.next_in = &compressed[0]; stream.avail_in = compressed.size(); stream.next_out = &uncompressed[0]; stream.avail_out = uncompressed.size(); result = inflate(&stream, Z_FINISH); if (result != Z_STREAM_END) { assert(0 && "inflate failed!"); return false; } result = inflateEnd(&stream); if (result != Z_OK) { assert(0 && "inflateEnd failed!"); return false; } return true; } bool CompressedReader::read(void* data, size_t size) { while (size > 0) { if (error) { return false; } if (buffer_read_pos < buffer.size()) { size_t amount = std::min(size, buffer.size() - buffer_read_pos); memcpy(data, &buffer[buffer_read_pos], amount); size -= amount; data = static_cast(data) + amount; buffer_read_pos += amount; continue; } if (have_saved_state && !have_saved_buffer) { std::swap(buffer, saved_buffer); have_saved_buffer = true; } CompressedWriter::BlockHeader header; if (!read_all(*fd, sizeof(header), &header, &fd_offset)) { error = true; return false; } std::vector compressed_buf; compressed_buf.resize(header.compressed_length); if (!read_all(*fd, compressed_buf.size(), &compressed_buf[0], &fd_offset)) { error = true; return false; } char ch; if (pread(*fd, &ch, 1, fd_offset) == 0) { eof = true; } buffer.resize(header.uncompressed_length); buffer_read_pos = 0; if (!do_decompress(compressed_buf, buffer)) { error = true; return false; } } return true; } void CompressedReader::rewind() { assert(!have_saved_state); fd_offset = 0; buffer_read_pos = 0; buffer.clear(); eof = false; } void CompressedReader::close() { fd = nullptr; } void CompressedReader::save_state() { assert(!have_saved_state); have_saved_state = true; have_saved_buffer = false; saved_fd_offset = fd_offset; saved_buffer_read_pos = buffer_read_pos; } void CompressedReader::restore_state() { assert(have_saved_state); have_saved_state = false; if (saved_fd_offset < fd_offset) { eof = false; } fd_offset = saved_fd_offset; if (have_saved_buffer) { std::swap(buffer, saved_buffer); saved_buffer.clear(); } buffer_read_pos = saved_buffer_read_pos; } uint64_t CompressedReader::uncompressed_bytes() const { uint64_t offset = 0; uint64_t uncompressed_bytes = 0; CompressedWriter::BlockHeader header; while (read_all(*fd, sizeof(header), &header, &offset)) { uncompressed_bytes += header.uncompressed_length; offset += header.compressed_length; } return uncompressed_bytes; } uint64_t CompressedReader::compressed_bytes() const { return lseek(*fd, 0, SEEK_END); } rr-4.1.0/src/CompressedReader.h000066400000000000000000000044371265436462100163470ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_COMPRESSED_READER_H_ #define RR_COMPRESSED_READER_H_ #include #include #include #include #include #include "ScopedFd.h" /** * CompressedReader opens an input file written by CompressedWriter * and reads data from it. Currently data is decompressed by the thread that * calls read(). */ class CompressedReader { public: CompressedReader(const std::string& filename); CompressedReader(const CompressedReader& aOther); ~CompressedReader(); bool good() const { return !error; } bool at_end() const { return eof && buffer_read_pos == buffer.size(); } // Returns true if successful. Otherwise there's an error and good() // will be false. bool read(void* data, size_t size); void rewind(); void close(); /** * Save the current position. Nested saves are not allowed. */ void save_state(); /** * Restore previously saved position. */ void restore_state(); /** * Gathers stats on the file stream. These are independent of what's * actually been read. */ uint64_t uncompressed_bytes() const; uint64_t compressed_bytes() const; template CompressedReader& operator>>(T& value) { read(&value, sizeof(value)); return *this; } CompressedReader& operator>>(std::string& value) { value.empty(); while (true) { char ch; read(&ch, 1); if (ch == 0) { break; } value.append(1, ch); } return *this; } template CompressedReader& operator>>(std::vector& value) { size_t len; *this >> len; value.resize(0); for (size_t i = 0; i < len; ++i) { T v; *this >> v; value.push_back(v); } return *this; } protected: /* Our fd might be the dup of another fd, so we can't rely on its current file position. Instead track the current position in fd_offset and use pread. */ uint64_t fd_offset; std::shared_ptr fd; bool error; bool eof; std::vector buffer; size_t buffer_read_pos; bool have_saved_state; bool have_saved_buffer; uint64_t saved_fd_offset; std::vector saved_buffer; size_t saved_buffer_read_pos; }; #endif /* RR_COMPRESSED_READER_H_ */ rr-4.1.0/src/CompressedWriter.cc000066400000000000000000000165261265436462100165610ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "CompressedWriter" #define _LARGEFILE64_SOURCE #include "CompressedWriter.h" #include #include #include #include #include #include #include #include using namespace std; void* CompressedWriter::compression_thread_callback(void* p) { static_cast(p)->compression_thread(); return nullptr; } CompressedWriter::CompressedWriter(const string& filename, size_t block_size, uint32_t num_threads) : fd(filename.c_str(), O_CLOEXEC | O_WRONLY | O_CREAT | O_EXCL | O_LARGEFILE, 0400) { this->block_size = block_size; threads.resize(num_threads); thread_pos.resize(num_threads); buffer.resize(block_size * (num_threads + 2)); pthread_mutex_init(&mutex, nullptr); pthread_cond_init(&cond, nullptr); for (uint32_t i = 0; i < num_threads; ++i) { thread_pos[i] = UINT64_MAX; } next_thread_pos = 0; next_thread_end_pos = 0; closing = false; write_error = false; producer_reserved_pos = 0; producer_reserved_write_pos = 0; producer_reserved_upto_pos = 0; error = false; if (fd < 0) { error = true; return; } // Hold the lock so threads don't inspect the 'threads' array // until we've finished initializing it. pthread_mutex_lock(&mutex); for (uint32_t i = 0; i < num_threads; ++i) { pthread_create(&threads[i], nullptr, compression_thread_callback, this); size_t last_slash = filename.rfind('/'); string thread_name = string("compress ") + (last_slash == string::npos ? filename : filename.substr(last_slash + 1)); pthread_setname_np(threads[i], thread_name.substr(0, 15).c_str()); } pthread_mutex_unlock(&mutex); } CompressedWriter::~CompressedWriter() { close(); pthread_mutex_destroy(&mutex); pthread_cond_destroy(&cond); } void CompressedWriter::write(const void* data, size_t size) { while (!error && size > 0) { uint64_t reservation_size = producer_reserved_upto_pos - producer_reserved_write_pos; if (reservation_size == 0) { update_reservation(WAIT); continue; } size_t buf_offset = (size_t)(producer_reserved_write_pos % buffer.size()); size_t amount = min(buffer.size() - buf_offset, (size_t)min(reservation_size, size)); memcpy(&buffer[buf_offset], data, amount); producer_reserved_write_pos += amount; data = static_cast(data) + amount; size -= amount; } if (!error && producer_reserved_write_pos - producer_reserved_pos >= buffer.size() / 2) { update_reservation(NOWAIT); } } void CompressedWriter::update_reservation(WaitFlag wait_flag) { pthread_mutex_lock(&mutex); next_thread_end_pos = producer_reserved_write_pos; producer_reserved_pos = producer_reserved_write_pos; // Wake up threads that might be waiting to consume data. pthread_cond_broadcast(&cond); while (!error) { if (write_error) { error = true; break; } uint64_t completed_pos = next_thread_pos; for (uint32_t i = 0; i < thread_pos.size(); ++i) { completed_pos = min(completed_pos, thread_pos[i]); } producer_reserved_upto_pos = completed_pos + buffer.size(); if (producer_reserved_pos < producer_reserved_upto_pos || wait_flag == NOWAIT) { break; } pthread_cond_wait(&cond, &mutex); } pthread_mutex_unlock(&mutex); } void CompressedWriter::compression_thread() { pthread_mutex_lock(&mutex); int thread_index; pthread_t self = pthread_self(); for (thread_index = 0; threads[thread_index] != self; ++thread_index) { } // Add slop for incompressible data vector outputbuf; outputbuf.resize((size_t)(block_size * 1.1) + sizeof(BlockHeader)); BlockHeader* header = reinterpret_cast(&outputbuf[0]); while (true) { if (!write_error && next_thread_pos < next_thread_end_pos && (closing || next_thread_pos + block_size <= next_thread_end_pos)) { thread_pos[thread_index] = next_thread_pos; next_thread_pos = min(next_thread_end_pos, next_thread_pos + block_size); // header->uncompressed_length must be <= block_size, // therefore fits in a size_t. header->uncompressed_length = (size_t)(next_thread_pos - thread_pos[thread_index]); pthread_mutex_unlock(&mutex); header->compressed_length = do_compress(thread_pos[thread_index], header->uncompressed_length, &outputbuf[sizeof(BlockHeader)], outputbuf.size() - sizeof(BlockHeader)); pthread_mutex_lock(&mutex); if (header->compressed_length == 0) { write_error = true; } // wait until we're the next thread that needs to write while (!write_error) { bool other_thread_write_first = false; for (uint32_t i = 0; i < thread_pos.size(); ++i) { if (thread_pos[i] < thread_pos[thread_index]) { other_thread_write_first = true; } } if (!other_thread_write_first) { break; } pthread_cond_wait(&cond, &mutex); } if (!write_error) { pthread_mutex_unlock(&mutex); ::write(fd, &outputbuf[0], sizeof(BlockHeader) + header->compressed_length); pthread_mutex_lock(&mutex); } thread_pos[thread_index] = UINT64_MAX; // do a broadcast because we might need to unblock // the producer thread or a compressor thread waiting // for us to write. pthread_cond_broadcast(&cond); continue; } if (closing && (write_error || next_thread_pos == next_thread_end_pos)) { break; } pthread_cond_wait(&cond, &mutex); } pthread_mutex_unlock(&mutex); } void CompressedWriter::close() { if (!fd.is_open()) { return; } update_reservation(NOWAIT); pthread_mutex_lock(&mutex); closing = true; pthread_cond_broadcast(&cond); pthread_mutex_unlock(&mutex); for (auto i = threads.begin(); i != threads.end(); ++i) { pthread_join(*i, nullptr); } fd.close(); } size_t CompressedWriter::do_compress(uint64_t offset, size_t length, uint8_t* outputbuf, size_t outputbuf_len) { z_stream stream; memset(&stream, 0, sizeof(stream)); int result = deflateInit(&stream, Z_DEFAULT_COMPRESSION); if (result != Z_OK) { assert(0 && "deflateInit failed!"); return 0; } stream.next_out = outputbuf; stream.avail_out = outputbuf_len; while (length > 0 || stream.avail_in > 0) { if (stream.avail_in == 0) { size_t buf_offset = (size_t)(offset % buffer.size()); size_t amount = min(length, buffer.size() - buf_offset); stream.next_in = &buffer[buf_offset]; stream.avail_in = amount; length -= amount; offset += amount; } if (stream.avail_out == 0) { assert(0 && "outputbuf exhausted!"); return 0; } result = deflate(&stream, length == 0 ? Z_FINISH : Z_NO_FLUSH); if (result != (length == 0 ? Z_STREAM_END : Z_OK)) { assert(0 && "deflate failed!"); return 0; } } result = deflateEnd(&stream); if (result != Z_OK) { assert(0 && "deflateEnd failed!"); return 0; } return stream.total_out; } rr-4.1.0/src/CompressedWriter.h000066400000000000000000000061121265436462100164110ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_COMPRESSED_WRITER_H_ #define RR_COMPRESSED_WRITER_H_ #include #include #include #include #include #include "ScopedFd.h" /** * CompressedWriter opens an output file and writes compressed blocks to it. * Blocks of a fixed but unspecified size (currently 1MB) are compressed. * Each block of compressed data is written to the file preceded by two * 32-bit words: the size of the compressed data (excluding block header) * and the size of the uncompressed data, in that order. See BlockHeader below. * * We use multiple threads to perform compression. The threads are * responsible for the actual data writes. The thread that creates the * CompressedWriter is the "producer" thread and must also be the caller of * 'write'. The producer thread may block in 'write' if 'buffer_size' bytes are * being compressed. * * Each data block is compressed independently using zlib. */ class CompressedWriter { public: CompressedWriter(const std::string& filename, size_t buffer_size, uint32_t num_threads); ~CompressedWriter(); // Call only on producer thread bool good() const { return !error; } // Call only on producer thread. void write(const void* data, size_t size); // Call only on producer thread void close(); struct BlockHeader { uint32_t compressed_length; uint32_t uncompressed_length; }; template CompressedWriter& operator<<(const T& value) { write(&value, sizeof(value)); return *this; } CompressedWriter& operator<<(const std::string& value) { write(value.c_str(), value.size() + 1); return *this; } template CompressedWriter& operator<<(const std::vector& value) { *this << value.size(); for (auto& i : value) { *this << i; } return *this; } protected: enum WaitFlag { WAIT, NOWAIT }; void update_reservation(WaitFlag wait_flag); static void* compression_thread_callback(void* p); void compression_thread(); size_t do_compress(uint64_t offset, size_t length, uint8_t* outputbuf, size_t outputbuf_len); // Immutable while threads are running ScopedFd fd; int block_size; pthread_mutex_t mutex; pthread_cond_t cond; std::vector threads; // Carefully shared... std::vector buffer; // BEGIN protected by 'mutex' /* position in output stream that this thread is currently working on, * or UINT64_MAX if it's idle */ std::vector thread_pos; /* position in output stream of data to dispatch to next thread */ uint64_t next_thread_pos; /* position in output stream of end of data ready to dispatch */ uint64_t next_thread_end_pos; bool closing; bool write_error; // END protected by 'mutex' /* producer thread only */ /* Areas in the buffer that have been reserved for write() */ uint64_t producer_reserved_pos; uint64_t producer_reserved_write_pos; uint64_t producer_reserved_upto_pos; bool error; }; #endif /* RR_COMPRESSED_WRITER_H_ */ rr-4.1.0/src/DiversionSession.cc000066400000000000000000000116731265436462100165640ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "DiversionSession" #include "DiversionSession.h" #include "AutoRemoteSyscalls.h" #include "log.h" #include "ReplaySession.h" using namespace rr; DiversionSession::DiversionSession(const ReplaySession& other) : emu_fs(other.emufs().clone()) {} DiversionSession::~DiversionSession() { // We won't permanently leak any OS resources by not ensuring // we've cleaned up here, but sessions can be created and // destroyed many times, and we don't want to temporarily hog // resources. kill_all_tasks(); assert(tasks().size() == 0 && vms().size() == 0); emu_fs->gc(*this); assert(emu_fs->size() == 0); } static void finish_emulated_syscall_with_ret(Task* t, long ret) { t->finish_emulated_syscall(); Registers r = t->regs(); r.set_syscall_result(ret); t->set_regs(r); } /** * Execute the syscall contained in |t|'s current register set. The * return value of the syscall is set for |t|'s registers, to be * returned to the tracee task. */ static void execute_syscall(Task* t) { t->finish_emulated_syscall(); AutoRemoteSyscalls remote(t); remote.syscall(remote.regs().original_syscallno(), remote.regs().arg1(), remote.regs().arg2(), remote.regs().arg3(), remote.regs().arg4(), remote.regs().arg5(), remote.regs().arg6()); remote.regs().set_syscall_result(t->regs().syscall_result()); } template static void process_syscall_arch(Task* t, int syscallno) { LOG(debug) << "Processing " << t->syscall_name(syscallno); switch (syscallno) { // The arm/disarm-desched ioctls are emulated as no-ops. // However, because the rr preload library expects these // syscalls to succeed and aborts if they don't, we fudge a // "0" return value. case Arch::ioctl: if (!t->is_desched_event_syscall()) { break; } finish_emulated_syscall_with_ret(t, 0); return; // We blacklist these syscalls because the params include // namespaced identifiers that are different in replay than // recording, and during replay they may refer to different, // live resources. For example, if a recorded tracees kills // one of its threads, then during replay that killed pid // might refer to a live process outside the tracee tree. We // don't want diversion tracees randomly shooting down other // processes! // // We optimistically assume that filesystem operations were // intended by the user. // // There's a potential problem with "fd confusion": in the // diversion tasks, fds returned from open() during replay are // emulated. But those fds may accidentally refer to live fds // in the task fd table. So write()s etc may not be writing // to the file the tracee expects. However, the only real fds // that leak into tracees are the stdio fds, and there's not // much harm that can be caused by accidental writes to them. case Arch::ipc: case Arch::kill: case Arch::rt_sigqueueinfo: case Arch::rt_tgsigqueueinfo: case Arch::tgkill: case Arch::tkill: return; } return execute_syscall(t); } static void process_syscall(Task* t, int syscallno) { RR_ARCH_FUNCTION(process_syscall_arch, t->arch(), t, syscallno) } /** * Advance execution until either a signal is received (including a SIGTRAP * generated by a single-step) or a syscall is made. */ DiversionSession::DiversionResult DiversionSession::diversion_step( Task* t, RunCommand command, int signal_to_deliver) { assert(command != RUN_SINGLESTEP_FAST_FORWARD); assert_fully_initialized(); DiversionResult result; // An exit might have occurred while processing a previous syscall. if (t->ptrace_event() == PTRACE_EVENT_EXIT) { result.status = DIVERSION_EXITED; return result; } if (t->syscallbuf_hdr) { // Disable syscall buffering during diversions t->syscallbuf_hdr->locked = 1; } switch (command) { case RUN_CONTINUE: LOG(debug) << "Continuing to next syscall"; t->resume_execution(RESUME_SYSEMU, RESUME_WAIT, RESUME_UNLIMITED_TICKS, signal_to_deliver); break; case RUN_SINGLESTEP: t->resume_execution(RESUME_SYSEMU_SINGLESTEP, RESUME_WAIT, RESUME_UNLIMITED_TICKS, signal_to_deliver); LOG(debug) << "Stepping to next insn/syscall"; break; default: FATAL() << "Illegal run command " << command; } if (t->ptrace_event() == PTRACE_EVENT_EXIT) { result.status = DIVERSION_EXITED; return result; } result.status = DIVERSION_CONTINUE; if (t->pending_sig()) { result.break_status = diagnose_debugger_trap(t); ASSERT(t, !result.break_status.singlestep_complete || command == RUN_SINGLESTEP); return result; } process_syscall(t, t->regs().original_syscallno()); check_for_watchpoint_changes(t, result.break_status); return result; } rr-4.1.0/src/DiversionSession.h000066400000000000000000000036561265436462100164300ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_DIVERSION_SESSION_H_ #define RR_DIVERSION_SESSION_H_ #include "EmuFs.h" #include "Session.h" class ReplaySession; /** * A DiversionSession lets you run task(s) forward without replay. * Clone a ReplaySession to a DiversionSession to execute some arbitrary * code for its side effects. * * Diversion allows tracees to execute freely, as in "recorder" * mode, but doesn't attempt to record any data. Diverter * emulates the syscalls it's able to (such as writes to stdio fds), * and essentially ignores the syscalls it doesn't know how to * implement. Tracees can easily get into inconsistent states within * diversion mode, and no attempt is made to detect or rectify that. * * Diverter mode is designed to support short-lived diversions from * "replayer" sessions, as required to support gdb's |call foo()| * feature. A diversion is created for the call frame, then discarded * when the call finishes (loosely speaking). */ class DiversionSession : public Session { public: typedef std::shared_ptr shr_ptr; ~DiversionSession(); EmuFs& emufs() const { return *emu_fs; } enum DiversionStatus { // Some execution was done. diversion_step() can be called again. DIVERSION_CONTINUE, // All tracees are dead. diversion_step() should not be called again. DIVERSION_EXITED }; struct DiversionResult { DiversionStatus status; BreakStatus break_status; }; /** * Try make progress in this diversion session. Run task t if possible. */ DiversionResult diversion_step(Task* t, RunCommand command = RUN_CONTINUE, int signal_to_deliver = 0); virtual DiversionSession* as_diversion() { return this; } private: friend class ReplaySession; DiversionSession(const ReplaySession& other); std::shared_ptr emu_fs; }; #endif // RR_DIVERSION_SESSION_H_ rr-4.1.0/src/DumpCommand.cc000066400000000000000000000202551265436462100154560ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include #include #include #include "preload/preload_interface.h" #include "AddressSpace.h" #include "Command.h" #include "kernel_metadata.h" #include "main.h" #include "TraceStream.h" #include "util.h" using namespace std; class DumpCommand : public Command { public: virtual int run(std::vector& args); protected: DumpCommand(const char* name, const char* help) : Command(name, help) {} static DumpCommand singleton; }; DumpCommand DumpCommand::singleton( "dump", " rr dump [OPTIONS] [] [...]\n" " Event specs can be either an event number like `127', or a range\n" " like `1000-5000'. By default, all events are dumped.\n" " -b, --syscallbuf dump syscallbuf contents\n" " -m, --recorded-metadata dump recorded data metadata\n" " -p, --mmaps dump mmap data\n" " -r, --raw dump trace frames in a more easily\n" " machine-parseable format instead of the\n" " default human-readable format\n" " -s, --statistics dump statistics about the trace\n"); struct DumpFlags { bool dump_syscallbuf; bool dump_recorded_data_metadata; bool dump_mmaps; bool raw_dump; bool dump_statistics; DumpFlags() : dump_syscallbuf(false), dump_recorded_data_metadata(false), dump_mmaps(false), raw_dump(false), dump_statistics(false) {} }; static bool parse_dump_arg(std::vector& args, DumpFlags& flags) { if (parse_global_option(args)) { return true; } static const OptionSpec options[] = { { 'b', "syscallbuf", NO_PARAMETER }, { 'm', "recorded-metadata", NO_PARAMETER }, { 'p', "mmaps", NO_PARAMETER }, { 'r', "raw", NO_PARAMETER }, { 's', "statistics", NO_PARAMETER } }; ParsedOption opt; if (!Command::parse_option(args, options, &opt)) { return false; } switch (opt.short_name) { case 'b': flags.dump_syscallbuf = true; break; case 'm': flags.dump_recorded_data_metadata = true; break; case 'p': flags.dump_mmaps = true; break; case 'r': flags.raw_dump = true; break; case 's': flags.dump_statistics = true; break; default: assert(0 && "Unknown option"); } return true; } static void dump_syscallbuf_data(TraceReader& trace, FILE* out, const TraceFrame& frame) { if (frame.event().type() != EV_SYSCALLBUF_FLUSH) { return; } auto buf = trace.read_raw_data(); size_t bytes_remaining = buf.data.size() - sizeof(struct syscallbuf_hdr); auto flush_hdr = reinterpret_cast(buf.data.data()); if (flush_hdr->num_rec_bytes > bytes_remaining) { fprintf(stderr, "Malformed trace file (bad recorded-bytes count)\n"); abort(); } bytes_remaining = flush_hdr->num_rec_bytes; auto record_ptr = reinterpret_cast(flush_hdr + 1); auto end_ptr = record_ptr + bytes_remaining; while (record_ptr < end_ptr) { auto record = reinterpret_cast(record_ptr); fprintf(out, " { syscall:'%s', ret:0x%lx, size:0x%lx }\n", syscall_name(record->syscallno, frame.event().arch()).c_str(), (long)record->ret, (long)record->size); if (record->size < sizeof(*record)) { fprintf(stderr, "Malformed trace file (bad record size)\n"); abort(); } record_ptr += stored_record_size(record->size); } } /** * Dump all events from the current to trace that match |spec| to * |out|. |spec| has the following syntax: /\d+(-\d+)?/, expressing * either a single event number of a range, and may be null to * indicate "dump all events". * * This function is side-effect-y, in that the trace file isn't * rewound in between matching each spec. Therefore specs should be * constructed so as to match properly on a serial linear scan; that * is, they should comprise disjoint and monotonically increasing * event sets. No attempt is made to enforce this or normalize specs. */ static void dump_events_matching(TraceReader& trace, const DumpFlags& flags, FILE* out, const string* spec) { uint32_t start = 0, end = numeric_limits::max(); // Try to parse the "range" syntax '[start]-[end]'. if (spec && 2 > sscanf(spec->c_str(), "%u-%u", &start, &end)) { // Fall back on assuming the spec is a single event // number, however it parses out with atoi(). start = end = atoi(spec->c_str()); } bool process_raw_data = flags.dump_syscallbuf || flags.dump_recorded_data_metadata; while (!trace.at_end()) { auto frame = trace.read_frame(); if (end < frame.time()) { return; } if (start <= frame.time() && frame.time() <= end) { if (flags.raw_dump) { frame.dump_raw(out); } else { frame.dump(out); } if (flags.dump_syscallbuf) { dump_syscallbuf_data(trace, out, frame); } while (true) { TraceReader::MappedData data; bool found; KernelMapping km = trace.read_mapped_region(&data, &found); if (!found) { break; } if (flags.dump_mmaps) { char prot_flags[] = "rwxp"; if (!(km.prot() & PROT_READ)) { prot_flags[0] = '-'; } if (!(km.prot() & PROT_WRITE)) { prot_flags[1] = '-'; } if (!(km.prot() & PROT_EXEC)) { prot_flags[2] = '-'; } if (km.flags() & MAP_SHARED) { prot_flags[3] = 's'; } fprintf(out, " { map_file:\"%s\", addr:%p, length:%p, " "prot_flags:\"%s\", file_offset:0x%llx }\n", km.fsname().c_str(), (void*)km.start().as_int(), (void*)km.size(), prot_flags, (long long)km.file_offset_bytes()); } } TraceReader::RawData data; while (process_raw_data && trace.read_raw_data_for_frame(frame, data)) { if (flags.dump_recorded_data_metadata) { fprintf(out, " { addr:%p, length:%p }\n", (void*)data.addr.as_int(), (void*)data.data.size()); } } if (!flags.raw_dump) { fprintf(out, "}\n"); } } else { TraceReader::RawData data; while (process_raw_data && trace.read_raw_data_for_frame(frame, data)) { } while (true) { TraceReader::MappedData data; KernelMapping km = trace.read_mapped_region(&data); if (km.size() == 0) { break; } } } } } static void dump_statistics(const TraceReader& trace, FILE* out) { uint64_t uncompressed = trace.uncompressed_bytes(); uint64_t compressed = trace.compressed_bytes(); fprintf(stdout, "// Uncompressed bytes %" PRIu64 ", compressed bytes %" PRIu64 ", ratio %.2fx\n", uncompressed, compressed, double(uncompressed) / compressed); } static void dump(const string& trace_dir, const DumpFlags& flags, const vector& specs, FILE* out) { TraceReader trace(trace_dir); if (flags.raw_dump) { fprintf(out, "global_time tid reason ticks " "hw_interrupts page_faults instructions " "eax ebx ecx edx esi edi ebp orig_eax esp eip eflags\n"); } if (specs.size() > 0) { for (size_t i = 0; i < specs.size(); ++i) { dump_events_matching(trace, flags, stdout, &specs[i]); } } else { // No specs => dump all events. dump_events_matching(trace, flags, stdout, nullptr /*all events*/); } if (flags.dump_statistics) { dump_statistics(trace, stdout); } } int DumpCommand::run(std::vector& args) { DumpFlags flags; while (parse_dump_arg(args, flags)) { } string trace_dir; if (!parse_optional_trace_dir(args, &trace_dir)) { print_help(stderr); return 1; } dump(trace_dir, flags, args, stdout); return 0; } rr-4.1.0/src/EmuFs.cc000066400000000000000000000156431265436462100142760ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "EmuFs" #include "EmuFs.h" #include #include #include #include #include "kernel_abi.h" #include "kernel_metadata.h" #include "log.h" #include "ReplaySession.h" using namespace rr; using namespace std; static void replace_char(string& s, char c, char replacement) { size_t i; while (string::npos != (i = s.find(c))) { s[i] = replacement; } } EmuFile::~EmuFile() { LOG(debug) << " EmuFs::~File(einode:" << inode_ << ")"; } EmuFile::shr_ptr EmuFile::clone() { auto f = EmuFile::create(orig_path.c_str(), device(), inode(), size_); // NB: this isn't the most efficient possible file copy, but // it's simple and not too slow. ifstream src(proc_path(), ifstream::binary); ofstream dst(f->proc_path(), ofstream::binary); dst << src.rdbuf(); return f; } string EmuFile::proc_path() const { stringstream ss; ss << "/proc/" << getpid() << "/fd/" << fd().get(); return ss.str(); } void EmuFile::update(dev_t device, ino_t inode, uint64_t size) { assert(device_ == device && inode_ == inode); if (size_ != size) { resize_shmem_segment(file, size); } size_ = size; } /*static*/ EmuFile::shr_ptr EmuFile::create(const string& orig_path, dev_t orig_device, ino_t orig_inode, uint64_t orig_file_size) { // Sanitize the mapped file path so that we can use it in a // leaf name. string path_tag(orig_path); replace_char(path_tag, '/', '\\'); stringstream name; name << SHMEM_FS << "/rr-emufs-" << getpid() << "-dev-" << orig_device << "-inode-" << orig_inode << "-" << path_tag; string real_name = name.str().substr(0, 255); ScopedFd fd = open(real_name.c_str(), O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, 0600); if (!fd.is_open()) { FATAL() << "Failed to create shmem segment " << real_name; } /* Remove the fs name so that we don't have to worry about * cleaning up this segment in error conditions. */ unlink(real_name.c_str()); resize_shmem_segment(fd, orig_file_size); shr_ptr f(new EmuFile(std::move(fd), orig_path, real_name, orig_device, orig_inode, orig_file_size)); LOG(debug) << "created emulated file for " << orig_path << " as " << name.str(); return f; } EmuFile::EmuFile(ScopedFd&& fd, const string& orig_path, const string& real_path, dev_t orig_device, ino_t orig_inode, uint64_t orig_file_size) : orig_path(orig_path), tmp_path(real_path), file(std::move(fd)), size_(orig_file_size), device_(orig_device), inode_(orig_inode), is_marked(false) {} EmuFile::shr_ptr EmuFs::at(const KernelMapping& recorded_map) const { return files.at(FileId(recorded_map)); } bool EmuFs::has_file_for(const KernelMapping& recorded_map) const { return files.find(FileId(recorded_map)) != files.end(); } EmuFs::shr_ptr EmuFs::clone() { shr_ptr fs(new EmuFs()); for (auto& kv : files) { const FileId& id = kv.first; fs->files[id] = kv.second->clone(); } return fs; } void EmuFs::gc(const Session& session) { // XXX this implementation is unnecessarily slow. But before // throwing it away for something different, give it another // shot once rr is caching local mmaps for all address spaces, // which obviates the need for the yuck slow maps parsing // here. LOG(debug) << "Beginning emufs gc of " << files.size() << " files"; // Mark in-use files by iterating through the mmaps of all // tracee address spaces. // // We inject these maps into the tracee and are careful to // close the injected fd after we finish the mmap. That means // that the only way tracees can hold a reference to the // underlying inode is through a memory mapping. So to // determine if a file is in use, we only have to find a // recognizable filename in some tracee's memory map. // // We check *all* tracee file tables because tracees can share // fds with each other in many ways, and we don't attempt to // track any of that. // // TODO: assuming AddressSpace == FileTable, but technically // they're different things: two tracees could share an // address space but have different file tables. size_t nr_marked_files = 0; for (auto& as : session.vms()) { Task* t = *as->task_set().begin(); LOG(debug) << " iterating /proc/" << t->tid << "/maps ..."; mark_used_vfiles(t, *as, &nr_marked_files); if (files.size() == nr_marked_files) { break; } } // Sweep all the virtual files that weren't marked. It might // be possible that a later task will mmap the same underlying // file that we're about to destroy. That's perfectly fine; // we'll just create it anew, and restore its addressible // contents from the snapshot saved to the trace. Since there // are no live references to the file in the interim, tracees // can't observe the destroy/recreate operation. vector garbage; for (auto it = files.begin(); it != files.end(); ++it) { if (!it->second->marked()) { garbage.push_back(it->first); } it->second->unmark(); } for (auto it = garbage.begin(); it != garbage.end(); ++it) { LOG(debug) << " emufs gc reclaiming einode:" << it->inode << "; fs name `" << files[*it]->emu_path() << "'"; files.erase(*it); } } EmuFile::shr_ptr EmuFs::get_or_create(const KernelMapping& recorded_km, uint64_t file_size) { FileId id(recorded_km); auto it = files.find(id); if (it != files.end()) { it->second->update(recorded_km.device(), recorded_km.inode(), file_size); return it->second; } auto vf = EmuFile::create(recorded_km.fsname(), recorded_km.device(), recorded_km.inode(), file_size); files[id] = vf; return vf; } void EmuFs::log() const { LOG(error) << "EmuFs " << this << " with " << files.size() << " files:"; for (auto& kv : files) { auto file = kv.second; LOG(error) << " " << file->emu_path(); } } /*static*/ EmuFs::shr_ptr EmuFs::create() { return shr_ptr(new EmuFs()); } EmuFs::EmuFs() {} void EmuFs::mark_used_vfiles(Task* t, const AddressSpace& as, size_t* nr_marked_files) { for (auto m : as.maps()) { LOG(debug) << " examining " << m.map.fsname().c_str() << " ..."; FileId id(m.recorded_map); auto id_ef = files.find(id); if (id_ef == files.end()) { // Mapping isn't relevant. Not all shared mappings get EmuFs entries // (e.g. readonly shared mappings of certain system files, like fonts). continue; } auto ef = id_ef->second; if (!ef->marked()) { ef->mark(); LOG(debug) << " marked einode:" << id.inode; ++*nr_marked_files; if (files.size() == *nr_marked_files) { LOG(debug) << " (marked all files, bailing)"; return; } } } } rr-4.1.0/src/EmuFs.h000066400000000000000000000140611265436462100141310ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_EMUFS_H_ #define RR_EMUFS_H_ #include #include #include #include #include "AddressSpace.h" #include "ScopedFd.h" #include "task.h" class ReplaySession; class Session; class Task; /** * Implement an "emulated file system" consisting of files that were * mmap'd shared during recording. These files require special * treatment because (i) they were most likely modified during * recording, so (ii) the original file contents only exist as * snapshots in the trace, but (iii) all mappings of the file must * point at the same underling resource, so that modifications are * seen by all mappees. * * The rr EmuFs creates "emulated files" in shared memory during * replay. Each efile is uniquely identified at a given event in the * trace by |(edev, einode)| (i.e., the recorded device ID and inode). * "What about inode recycling", you're probably thinking to yourself. * This scheme can cope with inode recycling, given a very important * assumption discussed below. * * Why is inode recycling not a problem? Assume that an mmap'd file * F_0 at trace time t_0 has the same (device, inode) ID as a * different file F_1 at trace time t_1. By definition, if the inode * ID was recycled in [t_0, t_1), then all references to F_0 must have * been dropped in that inverval. A corollary of that is that all * memory mappings of F_0 must have been fully unmapped in the * interval. As per the first long comment in |gc()| below, an * emulated file can only be "live" during replay if some tracee still * has a mapping of it. Tracees' mappings of emulated files is a * subset of the ways they can create references to real files during * recording. Therefore the event during replay that drops the last * reference to the emulated F_0 must be a tracee unmapping of F_0. * * So as long as we GC emulated F_0 at the event of its fatal * unmapping, the lifetimes of emulated F_0 and emulated F_1 must be * disjoint. And F_0 being GC'd at that point is the important * assumption mentioned above. */ /** * A file within an EmuFs. The file is real, but it's mapped to file * ID that was recorded during replay. */ class EmuFile { public: typedef std::shared_ptr shr_ptr; ~EmuFile(); /** * Return the fd of the real file backing this. */ const ScopedFd& fd() const { return file; } /** * Return a pathname referring to the fd of this in this * tracer's address space. For example, "/proc/12345/fd/5". */ std::string proc_path() const; /** * Return the path of the original file from recording, the * one this is emulating. */ const std::string emu_path() const { return orig_path; } const std::string real_path() const { return tmp_path; } dev_t device() const { return device_; } ino_t inode() const { return inode_; } private: friend class EmuFs; EmuFile(ScopedFd&& fd, const std::string& orig_path, const std::string& real_path, dev_t device, ino_t inode, uint64_t file_size); /** * Return a copy of this file. See |create()| for the meaning * of |fs_tag|. */ shr_ptr clone(); /** * Mark/unmark/check to see if this file is marked. */ void mark() { is_marked = true; } bool marked() const { return is_marked; } void unmark() { is_marked = false; } /** * Ensure that the emulated file is sized to match a later * stat() of it. */ void update(dev_t device, ino_t inode, uint64_t size); /** * Create a new emulated file for |orig_path| that will * emulate the recorded attributes |est|. |tag| is used to * uniquely identify this file among multiple EmuFs's that * might exist concurrently in this tracer process. */ static shr_ptr create(const std::string& orig_path, dev_t orig_device, ino_t orig_inode, uint64_t orig_file_size); std::string orig_path; std::string tmp_path; ScopedFd file; uint64_t size_; dev_t device_; ino_t inode_; bool is_marked; EmuFile(const EmuFile&) = delete; EmuFile operator=(const EmuFile&) = delete; }; class EmuFs { public: typedef std::shared_ptr shr_ptr; /** * Return the EmuFile for |recorded_map|, which must exist or this won't * return. */ EmuFile::shr_ptr at(const KernelMapping& recorded_map) const; bool has_file_for(const KernelMapping& recorded_map) const; /** * Return a copy of this fs such that |at()| and |get_or_create()| will * return semantically identical results as this, and such that mutations of * the returned fs won't affect this and vice versa. */ shr_ptr clone(); /** * Return an emulated file representing the recorded shared mapping * |recorded_km|. */ EmuFile::shr_ptr get_or_create(const KernelMapping& recorded_km, uint64_t file_size); /** * Dump information about this emufs to the "error" log. */ void log() const; size_t size() const { return files.size(); } /** Create and return a new emufs. */ static shr_ptr create(); /** * Collect emulated files that aren't referenced by tracees. * Call this only when a tracee's (possibly shared) file table * has been destroyed. All other gc triggers are handled * internally. */ void gc(const Session& session); private: EmuFs(); /** * Mark all the files being used by the tasks in |as|, and * increment |nt_marked_files| by the number of files that * were marked. */ void mark_used_vfiles(Task* t, const AddressSpace& as, size_t* nr_marked_files); struct FileId { FileId(const KernelMapping& recorded_map) : device(recorded_map.device()), inode(recorded_map.inode()) {} bool operator<(const FileId& other) const { return device < other.device || (device == other.device && inode < other.inode); } dev_t device; ino_t inode; }; typedef std::map FileMap; FileMap files; EmuFs(const EmuFs&) = delete; EmuFs& operator=(const EmuFs&) = delete; }; #endif // RR_EMUFS_H rr-4.1.0/src/Event.cc000066400000000000000000000176401265436462100143370ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "Event" #include "Event.h" #include #include #include #include "preload/preload_interface.h" #include "kernel_abi.h" #include "kernel_metadata.h" #include "log.h" using namespace rr; using namespace std; Event::Event(EncodedEvent e) { switch (event_type = e.type) { case EV_SEGV_RDTSC: case EV_EXIT: case EV_SCHED: case EV_SYSCALLBUF_FLUSH: case EV_SYSCALLBUF_ABORT_COMMIT: case EV_SYSCALLBUF_RESET: case EV_PATCH_SYSCALL: case EV_GROW_MAP: case EV_TRACE_TERMINATION: case EV_UNSTABLE_EXIT: case EV_INTERRUPTED_SYSCALL_NOT_RESTARTED: case EV_EXIT_SIGHANDLER: new (&Base()) BaseEvent(e.has_exec_info, e.arch()); // No auxiliary data. assert(0 == e.data); return; case EV_DESCHED: new (&Desched()) DeschedEvent(nullptr, e.arch()); return; case EV_SIGNAL: case EV_SIGNAL_DELIVERY: case EV_SIGNAL_HANDLER: new (&Signal()) SignalEvent( ~DET_SIGNAL_BIT & e.data, (DET_SIGNAL_BIT & e.data) ? DETERMINISTIC_SIG : NONDETERMINISTIC_SIG, e.arch()); return; case EV_SYSCALL: new (&Syscall()) SyscallEvent(e.data, e.arch()); Syscall().state = e.is_syscall_entry ? ENTERING_SYSCALL : EXITING_SYSCALL; return; default: FATAL() << "Unexpected event " << *this; } } Event::Event(const Event& o) : event_type(o.event_type) { switch (event_type) { case EV_DESCHED: new (&Desched()) DeschedEvent(o.Desched()); return; case EV_SIGNAL: case EV_SIGNAL_DELIVERY: case EV_SIGNAL_HANDLER: new (&Signal()) SignalEvent(o.Signal()); return; case EV_SYSCALL: case EV_SYSCALL_INTERRUPTION: new (&Syscall()) SyscallEvent(o.Syscall()); return; default: new (&Base()) BaseEvent(o.Base()); return; } } Event::~Event() { switch (event_type) { case EV_DESCHED: Desched().~DeschedEvent(); return; case EV_SIGNAL: case EV_SIGNAL_DELIVERY: case EV_SIGNAL_HANDLER: Signal().~SignalEvent(); return; case EV_SYSCALL: case EV_SYSCALL_INTERRUPTION: Syscall().~SyscallEvent(); return; default: Base().~BaseEvent(); return; } } Event& Event::operator=(const Event& o) { event_type = o.event_type; switch (event_type) { case EV_DESCHED: Desched().operator=(o.Desched()); break; case EV_SIGNAL: case EV_SIGNAL_DELIVERY: case EV_SIGNAL_HANDLER: Signal().operator=(o.Signal()); break; case EV_SYSCALL: case EV_SYSCALL_INTERRUPTION: Syscall().operator=(o.Syscall()); break; default: Base().operator=(o.Base()); break; } return *this; } static void set_encoded_event_data(EncodedEvent* e, int data) { e->data = data; // Ensure that e->data is wide enough for the data assert(e->data == data); } EncodedEvent Event::encode() const { EncodedEvent e; e.type = event_type; e.has_exec_info = has_exec_info(); e.arch_ = arch(); // Arbitrarily designate events for which this isn't // meaningful as being at "entry". The events for which this // is meaningful set it below. e.is_syscall_entry = true; switch (event_type) { case EV_SEGV_RDTSC: case EV_EXIT: case EV_SCHED: case EV_SYSCALLBUF_FLUSH: case EV_SYSCALLBUF_ABORT_COMMIT: case EV_SYSCALLBUF_RESET: case EV_PATCH_SYSCALL: case EV_GROW_MAP: case EV_TRACE_TERMINATION: case EV_UNSTABLE_EXIT: case EV_INTERRUPTED_SYSCALL_NOT_RESTARTED: case EV_EXIT_SIGHANDLER: // No auxiliary data. set_encoded_event_data(&e, 0); return e; case EV_SIGNAL: case EV_SIGNAL_DELIVERY: case EV_SIGNAL_HANDLER: { set_encoded_event_data( &e, Signal().siginfo.si_signo | (Signal().deterministic == DETERMINISTIC_SIG ? DET_SIGNAL_BIT : 0)); return e; } case EV_SYSCALL: { // PROCESSING_SYSCALL is a transient state that we // should never attempt to record. assert(Syscall().state != PROCESSING_SYSCALL); set_encoded_event_data( &e, Syscall().is_restart ? syscall_number_for_restart_syscall(e.arch_) : Syscall().number); e.is_syscall_entry = Syscall().state == ENTERING_SYSCALL; return e; } default: FATAL() << "Unknown event type " << event_type; return e; // not reached } } HasExecInfo Event::record_exec_info() const { return Base().has_exec_info; } bool Event::has_ticks_slop() const { switch (type()) { case EV_SYSCALLBUF_ABORT_COMMIT: case EV_SYSCALLBUF_FLUSH: case EV_SYSCALLBUF_RESET: case EV_DESCHED: case EV_GROW_MAP: return true; default: return false; } } bool Event::is_signal_event() const { switch (event_type) { case EV_SIGNAL: case EV_SIGNAL_DELIVERY: case EV_SIGNAL_HANDLER: return true; default: return false; } } bool Event::is_syscall_event() const { switch (event_type) { case EV_SYSCALL: case EV_SYSCALL_INTERRUPTION: return true; default: return false; } } void Event::log() const { LOG(info) << *this; } string Event::str() const { stringstream ss; ss << type_name(); switch (event_type) { case EV_SIGNAL: case EV_SIGNAL_DELIVERY: case EV_SIGNAL_HANDLER: ss << ": " << signal_name(Signal().siginfo.si_signo) << "(" << (const char*)(Signal().deterministic == DETERMINISTIC_SIG ? "det" : "async") << ")"; break; case EV_SYSCALL: case EV_SYSCALL_INTERRUPTION: ss << ": " << syscall_name(Syscall().number, Syscall().regs.arch()); break; default: // No auxiliary information. break; } return ss.str(); } void Event::transform(EventType new_type) { switch (event_type) { case EV_SIGNAL: assert(EV_SIGNAL_DELIVERY == new_type); break; case EV_SIGNAL_DELIVERY: assert(EV_SIGNAL_HANDLER == new_type); break; case EV_SYSCALL: assert(EV_SYSCALL_INTERRUPTION == new_type); break; case EV_SYSCALL_INTERRUPTION: assert(EV_SYSCALL == new_type); break; default: FATAL() << "Can't transform immutable " << *this << " into " << new_type; } event_type = new_type; } std::string Event::type_name() const { switch (event_type) { case EV_SENTINEL: return "(none)"; #define CASE(_t) \ case EV_##_t: \ return #_t CASE(EXIT); CASE(EXIT_SIGHANDLER); CASE(INTERRUPTED_SYSCALL_NOT_RESTARTED); CASE(NOOP); CASE(SCHED); CASE(SEGV_RDTSC); CASE(SYSCALLBUF_FLUSH); CASE(SYSCALLBUF_ABORT_COMMIT); CASE(SYSCALLBUF_RESET); CASE(PATCH_SYSCALL); CASE(GROW_MAP); CASE(UNSTABLE_EXIT); CASE(DESCHED); CASE(SIGNAL); CASE(SIGNAL_DELIVERY); CASE(SIGNAL_HANDLER); CASE(SYSCALL); CASE(SYSCALL_INTERRUPTION); CASE(TRACE_TERMINATION); #undef CASE default: FATAL() << "Unknown event type " << event_type; return nullptr; // not reached } } SignalEvent::SignalEvent(const siginfo_t& siginfo, SupportedArch arch) : BaseEvent(HAS_EXEC_INFO, arch), siginfo(siginfo), deterministic(is_deterministic_signal(siginfo)) {} const char* state_name(SyscallState state) { switch (state) { #define CASE(_id) \ case _id: \ return #_id CASE(NO_SYSCALL); CASE(ENTERING_SYSCALL); CASE(PROCESSING_SYSCALL); CASE(EXITING_SYSCALL); #undef CASE default: return "???state"; } } rr-4.1.0/src/Event.h000066400000000000000000000274701265436462100142030ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_EVENT_H_ #define RR_EVENT_H_ #include #include #include #include #include "kernel_abi.h" #include "Registers.h" /** * Events serve two purposes: tracking Task state during recording, and * being stored in traces to guide replay. Some events are only used during * recording and are never actually stored in traces (and are thus irrelevant * to replay). */ enum EventType { EV_UNASSIGNED, EV_SENTINEL, // TODO: this is actually a pseudo-pseudosignal: it will never // appear in a trace, but is only used to communicate between // different parts of the recorder code that should be // refactored to not have to do that. EV_NOOP, EV_DESCHED, // Events present in traces: // No associated data. EV_EXIT, // Tracee exited its sighandler. We leave this breadcrumb so // that the popping of not-restarted syscall interruptions and // sigreturns is replayed in the same order. EV_EXIT_SIGHANDLER, // Pretty self-explanatory: recording detected that an // interrupted syscall wasn't restarted, so the interruption // record can be popped off the tracee's event stack. EV_INTERRUPTED_SYSCALL_NOT_RESTARTED, // Scheduling signal interrupted the trace. EV_SCHED, EV_SEGV_RDTSC, // Recorded syscallbuf data for one or more buffered syscalls. EV_SYSCALLBUF_FLUSH, EV_SYSCALLBUF_ABORT_COMMIT, // The syscallbuf was reset to the empty state. We record this event // later than it really happens, because during replay we must proceed to // the event *after* a syscallbuf flush and then reset the syscallbuf, // to ensure we don't reset it while preload code is still using the data. EV_SYSCALLBUF_RESET, // Syscall was entered, the syscall instruction was patched, and the // syscall was aborted. Resume execution at the patch. EV_PATCH_SYSCALL, // Map memory pages due to a (future) memory access. This is associated // with a mmap entry for the new pages. EV_GROW_MAP, // The trace was terminated before all tasks exited, most // likely because the recorder was sent a terminating signal. // There are no more trace frames coming, so the best thing to // do is probably to shut down. EV_TRACE_TERMINATION, // Like USR_EXIT, but recorded when the task is in an // "unstable" state in which we're not sure we can // synchronously wait for it to "really finish". EV_UNSTABLE_EXIT, // Use .signal. EV_SIGNAL, EV_SIGNAL_DELIVERY, EV_SIGNAL_HANDLER, // Use .syscall. EV_SYSCALL, EV_SYSCALL_INTERRUPTION, EV_LAST }; enum HasExecInfo { NO_EXEC_INFO, HAS_EXEC_INFO }; /** * An encoding of the relevant bits of |struct event| that can be * cheaply and easily serialized. */ union EncodedEvent { struct { EventType type : 5; bool is_syscall_entry : 1; HasExecInfo has_exec_info : 1; SupportedArch arch_ : 1; int data : 24; }; int encoded; bool operator==(const EncodedEvent& other) const { return encoded == other.encoded; } bool operator!=(const EncodedEvent& other) const { return !(*this == other); } SupportedArch arch() const { return arch_; } }; static_assert(sizeof(int) == sizeof(EncodedEvent), "Bit fields are messed up"); static_assert(EV_LAST < (1 << 5), "Allocate more bits to the |type| field"); /** * Events are interesting occurrences during tracee execution which * are relevant for replay. Most events correspond to tracee * execution, but some (a subset of "pseudosigs") save actions that * the *recorder* took on behalf of the tracee. */ struct BaseEvent { /** * Pass |HAS_EXEC_INFO| if the event is at a stable execution * point that we'll reach during replay too. */ BaseEvent(HasExecInfo has_exec_info, SupportedArch arch) : has_exec_info(has_exec_info), arch_(arch) {} SupportedArch arch() const { return arch_; } // When replaying an event is expected to leave the tracee in // the same execution state as during replay, the event has // meaningful execution info, and it should be recorded for // checking. But some pseudosigs aren't recorded in the same // tracee state they'll be replayed, so the tracee exeuction // state isn't meaningful. HasExecInfo has_exec_info; SupportedArch arch_; }; /** * Desched events track the fact that a tracee's desched-event * notification fired during a may-block buffered syscall, which rr * interprets as the syscall actually blocking (for a potentially * unbounded amount of time). After the syscall exits, rr advances * the tracee to where the desched is "disarmed" by the tracee. */ struct DeschedEvent : public BaseEvent { /** Desched of |rec|. */ DeschedEvent(const struct syscallbuf_record* rec, SupportedArch arch) : BaseEvent(NO_EXEC_INFO, arch), rec(rec) {} // Record of the syscall that was interrupted by a desched // notification. It's legal to reference this memory /while // the desched is being processed only/, because |t| is in the // middle of a desched, which means it's successfully // allocated (but not yet committed) this syscall record. const struct syscallbuf_record* rec; }; /** * Signal events track signals through the delivery phase, and if the * signal finds a sighandler, on to the end of the handling face. */ enum SignalDeterministic { NONDETERMINISTIC_SIG = 0, DETERMINISTIC_SIG = 1 }; struct SignalEvent : public BaseEvent { /** * Signal |signo| is the signum, and |deterministic| is true * for deterministically-delivered signals (see * record_signal.cc). */ SignalEvent(const siginfo_t& siginfo, SupportedArch arch); SignalEvent(int signo, SignalDeterministic deterministic, SupportedArch arch) : BaseEvent(HAS_EXEC_INFO, arch), deterministic(deterministic) { memset(&siginfo, 0, sizeof(siginfo)); siginfo.si_signo = signo; } /** * For SIGILL, SIGFPE, SIGSEGV, SIGBUS and SIGTRAP this is si_addr. * For other signals this is zero. */ uint64_t signal_data() const { switch (siginfo.si_signo) { case SIGILL: case SIGFPE: case SIGSEGV: case SIGBUS: case SIGTRAP: return (uint64_t)siginfo.si_addr; default: return 0; } } void set_signal_data(uint64_t data) { switch (siginfo.si_signo) { case SIGILL: case SIGFPE: case SIGSEGV: case SIGBUS: case SIGTRAP: siginfo.si_addr = (void*)data; break; } } // Signal info siginfo_t siginfo; // True if this signal will be deterministically raised as the // side effect of retiring an instruction during replay, for // example |load $r 0x0| deterministically raises SIGSEGV. SignalDeterministic deterministic; }; /** * Syscall events track syscalls through entry into the kernel, * processing in the kernel, and exit from the kernel. * * This also models interrupted syscalls. During recording, only * descheduled buffered syscalls /push/ syscall interruptions; all * others are detected at exit time and transformed into syscall * interruptions from the original, normal syscalls. * * During replay, we push interruptions to know when we need * to emulate syscall entry, since the kernel won't have set * things up for the tracee to restart on its own. */ enum SyscallState { NO_SYSCALL, ENTERING_SYSCALL, PROCESSING_SYSCALL, EXITING_SYSCALL }; struct SyscallEvent : public BaseEvent { /** Syscall |syscallno| is the syscall number. */ SyscallEvent(int syscallno, SupportedArch arch) : BaseEvent(HAS_EXEC_INFO, arch), regs(arch), desched_rec(nullptr), state(NO_SYSCALL), number(syscallno), is_restart(false) {} // The original (before scratch is set up) arguments to the // syscall passed by the tracee. These are used to detect // restarted syscalls. Registers regs; // If this is a descheduled buffered syscall, points at the // record for that syscall. const struct syscallbuf_record* desched_rec; SyscallState state; // Syscall number. int number; // Nonzero when this syscall was restarted after a signal // interruption. bool is_restart; }; struct syscall_interruption_t {}; static const syscall_interruption_t interrupted; /** * Sum type for all events (well, a C++ approximation thereof). An * Event always has a definted EventType. It can be down-casted to * one of the leaf types above iff the type tag is correct. */ struct Event { Event() : event_type(EV_UNASSIGNED) {} Event(EventType type, HasExecInfo info, SupportedArch arch) : event_type(type), base(info, arch) {} Event(const DeschedEvent& ev) : event_type(EV_DESCHED), desched(ev) {} Event(const SignalEvent& ev) : event_type(EV_SIGNAL), signal(ev) {} Event(const SyscallEvent& ev) : event_type(EV_SYSCALL), syscall(ev) {} Event(const syscall_interruption_t&, const SyscallEvent& ev) : event_type(EV_SYSCALL_INTERRUPTION), syscall(ev) {} /** * Re-construct this from an encoding created by * |Event::encode()|. */ Event(EncodedEvent e); Event(const Event& o); ~Event(); Event& operator=(const Event& o); // Events can always be cased to BaseEvent regardless of the // current concrete type, because all constituent types // inherit from BaseEvent. BaseEvent& Base() { return base; } const BaseEvent& Base() const { return base; } DeschedEvent& Desched() { assert(EV_DESCHED == event_type); return desched; } const DeschedEvent& Desched() const { assert(EV_DESCHED == event_type); return desched; } SignalEvent& Signal() { assert(is_signal_event()); return signal; } const SignalEvent& Signal() const { assert(is_signal_event()); return signal; } SyscallEvent& Syscall() { assert(is_syscall_event()); return syscall; } const SyscallEvent& Syscall() const { assert(is_syscall_event()); return syscall; } enum { // Deterministic signals are encoded as (signum | DET_SIGNAL_BIT). DET_SIGNAL_BIT = 0x80 }; /** * Return an encoding of this event that can be cheaply * serialized. The encoding is lossy. */ EncodedEvent encode() const; /** * Return true if a tracee at this event has meaningful * execution info (registers etc.) that rr should record. * "Meaningful" means that the same state will be seen when * reaching this event during replay. */ HasExecInfo record_exec_info() const; HasExecInfo has_exec_info() const { return base.has_exec_info; } bool has_ticks_slop() const; /** * Return true if this is one of the indicated type of events. */ bool is_signal_event() const; bool is_syscall_event() const; /** * Dump info about this to INFO log. * * Note: usually you want to use |LOG(info) << event;|. */ void log() const; /** Return a string describing this. */ std::string str() const; /** * Dynamically change the type of this. Only a small number * of type changes are allowed. */ void transform(EventType new_type); /** Return the current type of this. */ EventType type() const { return event_type; } /** Return the architecture associated with this. */ SupportedArch arch() const { return base.arch(); } /** Change the architecture for this event. */ void set_arch(SupportedArch a) { base.arch_ = a; } /** Return a string naming |ev|'s type. */ std::string type_name() const; /** Return an event of type EV_NOOP. */ static Event noop(SupportedArch arch) { return Event(EV_NOOP, NO_EXEC_INFO, arch); } private: EventType event_type; union { BaseEvent base; DeschedEvent desched; SignalEvent signal; SyscallEvent syscall; }; }; inline static std::ostream& operator<<(std::ostream& o, const Event& ev) { return o << ev.str(); } inline static std::ostream& operator<<(std::ostream& o, const EncodedEvent& ev) { return o << Event(ev); } const char* state_name(SyscallState state); #endif // EVENT_H_ rr-4.1.0/src/ExtraRegisters.cc000066400000000000000000000133351265436462100162260ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "registers" #include "ExtraRegisters.h" #include #include #include "log.h" #include "util.h" using namespace rr; using namespace std; // This is the byte offset at which the ST0-7 register data begins // with an xsave (or fxsave) block. static const int st_regs_offset = 32; // NB: each STx register holds 10 bytes of actual data, but each // occupies 16 bytes of space within (f)xsave, presumably for // alignment purposes. static const int st_reg_space = 16; // Byte offset at which the XMM0-15 register data begins with (f)xsave. static const int xmm_regs_offset = 160; static const int xmm_reg_space = 16; static const uint8_t fxsave_387_ctrl_offsets[] = { // The Intel documentation says that the following layout is only valid in // 32-bit mode, or when fxsave is executed in 64-bit mode without an // appropriate REX prefix. The kernel seems to only use fxsave with the // REX prefix, so one would think these offsets would be different. But // GDB seems happy to use these offsets, so that's what we use too. 0, // DREG_64_FCTRL 2, // DREG_64_FSTAT 4, // DREG_64_FTAG 12, // DREG_64_FISEG 8, // DREG_64_FIOFF 20, // DREG_64_FOSEG 16, // DREG_64_FOOFF 6, // DREG_64_FOP }; struct RegData { int offset; int size; RegData(int offset = -1, int size = 0) : offset(offset), size(size) {} }; static bool reg_in_range(GdbRegister regno, GdbRegister low, GdbRegister high, int offset_base, int offset_stride, int size, RegData* out) { if (regno < low || regno > high) { return false; } out->offset = offset_base + offset_stride * (regno - low); out->size = size; return true; } // Return the size and data location of register |regno|. // If we can't read the register, returns -1 in 'offset'. static RegData xsave_register_data(SupportedArch arch, GdbRegister regno) { // Check regno is in range, and if it's 32-bit then convert it to the // equivalent 64-bit register. switch (arch) { case x86: if (regno >= DREG_YMM0H && regno <= DREG_YMM7H) { regno = (GdbRegister)(regno - DREG_YMM0H + DREG_64_YMM0H); break; } if (regno < DREG_FIRST_FXSAVE_REG || regno > DREG_LAST_FXSAVE_REG) { return RegData(); } if (regno == DREG_MXCSR) { regno = DREG_64_MXCSR; } else { regno = (GdbRegister)(regno - DREG_FIRST_FXSAVE_REG + DREG_64_FIRST_FXSAVE_REG); } break; case x86_64: if (regno < DREG_64_FIRST_FXSAVE_REG || regno > DREG_64_LAST_FXSAVE_REG) { return RegData(); } break; default: assert(0 && "Unknown arch"); return RegData(); } RegData result; if (reg_in_range(regno, DREG_64_ST0, DREG_64_ST7, st_regs_offset, st_reg_space, 10, &result)) { return result; } if (reg_in_range(regno, DREG_64_XMM0, DREG_64_XMM15, xmm_regs_offset, xmm_reg_space, 16, &result)) { return result; } // TODO: support AVX registers properly. Right now we always return a location // of -1. if (reg_in_range(regno, DREG_64_YMM0H, DREG_64_YMM15H, -1, 0, 16, &result)) { return result; } if (regno == DREG_64_MXCSR) { return RegData(24, 4); } assert(regno >= DREG_64_FCTRL && regno <= DREG_64_FOP); // NB: most of these registers only occupy 2 bytes of space in // the (f)xsave region, but gdb's default x86 target // config expects us to send back 4 bytes of data for // each. return RegData(fxsave_387_ctrl_offsets[regno - DREG_64_FCTRL], 4); } size_t ExtraRegisters::read_register(uint8_t* buf, GdbRegister regno, bool* defined) const { if (format_ != XSAVE) { *defined = false; return 0; } auto reg_data = xsave_register_data(arch(), regno); if (reg_data.offset < 0 || empty()) { *defined = false; return reg_data.size; } assert(reg_data.size > 0); *defined = true; memcpy(buf, data.data() + reg_data.offset, reg_data.size); return reg_data.size; } static X86Arch::user_fpregs_struct convert_fxsave_to_x86_fpregs( const X86Arch::user_fpxregs_struct& buf) { X86Arch::user_fpregs_struct result; for (int i = 0; i < 8; ++i) { memcpy(reinterpret_cast(result.st_space) + i * 10, &buf.st_space[i * 4], 10); } result.cwd = buf.cwd | 0xffff0000; result.swd = buf.swd | 0xffff0000; // XXX Computing the correct twd is a pain. It probably doesn't matter to us // in practice. result.twd = 0; result.fip = buf.fip; result.fcs = buf.fcs; result.foo = buf.foo; result.fos = buf.fos; return result; } template static vector to_vector(const T& v) { vector result; result.resize(sizeof(T)); memcpy(result.data(), &v, sizeof(T)); return result; } vector ExtraRegisters::get_user_fpregs_struct( SupportedArch arch) const { assert(format_ == XSAVE); switch (arch) { case x86: assert(data.size() >= sizeof(X86Arch::user_fpxregs_struct)); return to_vector(convert_fxsave_to_x86_fpregs( *reinterpret_cast(data.data()))); case x86_64: assert(data.size() >= sizeof(X64Arch::user_fpregs_struct)); return to_vector( *reinterpret_cast(data.data())); default: assert(0 && "Unknown arch"); return vector(); } } X86Arch::user_fpxregs_struct ExtraRegisters::get_user_fpxregs_struct() const { assert(format_ == XSAVE); assert(data.size() >= sizeof(X86Arch::user_fpxregs_struct)); return *reinterpret_cast(data.data()); } rr-4.1.0/src/ExtraRegisters.h000066400000000000000000000055301265436462100160660ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_EXTRA_REGISTERS_H_ #define RR_EXTRA_REGISTERS_H_ #include #include #include #include "GdbRegister.h" #include "kernel_abi.h" /** * An ExtraRegisters object contains values for all user-space-visible * registers other than those in Registers. * * Task is responsible for creating meaningful values of this class. * * The only reason this class has an arch() is to enable us to * interpret GdbRegister. */ class ExtraRegisters { public: // Create empty (uninitialized/unknown registers) value ExtraRegisters(SupportedArch arch = SupportedArch(-1)) : format_(NONE), arch_(arch) {} /** * On a x86 64-bit kernel, these structures are initialized by an XSAVE64 or * FXSAVE64. * On a x86 32-bit kernel, they are initialized by an XSAVE or FXSAVE. * * The layouts are basically the same in the first 512 bytes --- an * FXSAVE(64) area. The differences are: * -- On a 64-bit kernel, registers XMM8-XMM15 are saved, but on a 32-bit * kernel they are not (that space is reserved). * -- On a 64-bit kernel, bytes 8-15 store a 64-bit "FPU IP" address, * but on a 32-bit kernel they store "FPU IP/CS". Likewise, * bytes 16-23 store "FPU DP" or "FPU DP/DS". * We basically ignore these differences. If gdb requests 32-bit-specific * registers, we return them, assuming that the data there is valid. * * XSAVE/XSAVE64 have extra information after the first 512 bytes, which we * currently save and restore but do not otherwise use. If the data record * has more than 512 bytes then it's an XSAVE(64) area, otherwise it's just * the FXSAVE(64) area. */ enum Format { NONE, XSAVE }; // Set values from raw data void set_to_raw_data(Format format, std::vector& consume_data) { format_ = format; std::swap(data, consume_data); } void set_arch(SupportedArch a) { arch_ = a; } Format format() const { return format_; } SupportedArch arch() const { return arch_; } int data_size() const { return data.size(); } const uint8_t* data_bytes() const { return data.data(); } bool empty() const { return data.empty(); } /** * Like |Registers::read_register()|, except attempts to read * the value of an "extra register" (floating point / vector). */ size_t read_register(uint8_t* buf, GdbRegister regno, bool* defined) const; /** * Get a user_fpregs_struct for a particular Arch from these ExtraRegisters. */ std::vector get_user_fpregs_struct(SupportedArch arch) const; /** * Get a user_fpxregs_struct for from these ExtraRegisters. */ rr::X86Arch::user_fpxregs_struct get_user_fpxregs_struct() const; private: friend class Task; Format format_; SupportedArch arch_; std::vector data; }; #endif /* RR_EXTRA_REGISTERS_H_ */ rr-4.1.0/src/FdTable.cc000066400000000000000000000067671265436462100145670ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "FdTable.h" #include #include #include "rr/rr.h" #include "log.h" #include "Session.h" #include "task.h" using namespace std; void FdTable::add_monitor(int fd, FileMonitor* monitor) { // In the future we could support multiple monitors on an fd, but we don't // need to yet. assert(!is_monitoring(fd)); fds[fd] = FileMonitor::shr_ptr(monitor); update_syscallbuf_fds_disabled(fd); } bool FdTable::allow_close(int fd) { auto it = fds.find(fd); if (it == fds.end()) { return true; } return it->second->allow_close(); } Switchable FdTable::will_write(Task* t, int fd) { auto it = fds.find(fd); if (it == fds.end()) { return ALLOW_SWITCH; } return it->second->will_write(t); } void FdTable::did_write(Task* t, int fd, const std::vector& ranges) { auto it = fds.find(fd); if (it != fds.end()) { it->second->did_write(t, ranges); } } void FdTable::did_dup(int from, int to) { if (fds.count(from)) { fds[to] = fds[from]; } else { fds.erase(to); } update_syscallbuf_fds_disabled(to); } void FdTable::did_close(int fd) { fds.erase(fd); update_syscallbuf_fds_disabled(fd); } static bool is_fd_monitored_in_any_task(AddressSpace* vm, int fd) { for (Task* t : vm->task_set()) { if (t->fd_table()->is_monitoring(fd)) { return true; } } return false; } void FdTable::update_syscallbuf_fds_disabled(int fd) { assert(fd >= 0); assert(task_set().size() > 0); unordered_set vms_updated; // It's possible for tasks with different VMs to share this fd table. // But tasks with the same VM might have different fd tables... for (Task* t : task_set()) { AddressSpace* vm = t->vm().get(); if (vms_updated.find(vm) != vms_updated.end()) { continue; } vms_updated.insert(vm); if (!t->syscallbuf_fds_disabled_child.is_null() && fd < SYSCALLBUF_FDS_DISABLED_SIZE) { bool disable = is_fd_monitored_in_any_task(vm, fd); t->write_mem(t->syscallbuf_fds_disabled_child + fd, (char)disable); } } } void FdTable::init_syscallbuf_fds_disabled(Task* t) { ASSERT(t, has_task(t)); if (t->syscallbuf_fds_disabled_child.is_null()) { return; } char disabled[SYSCALLBUF_FDS_DISABLED_SIZE]; memset(disabled, 0, sizeof(disabled)); // It's possible that some tasks in this address space have a different // FdTable. We need to disable syscallbuf for an fd if any tasks for this // address space are monitoring the fd. for (Task* vm_t : t->vm()->task_set()) { for (auto& it : vm_t->fd_table()->fds) { int fd = it.first; assert(fd >= 0); if (fd < SYSCALLBUF_FDS_DISABLED_SIZE) { disabled[fd] = 1; } } } t->write_mem(t->syscallbuf_fds_disabled_child, disabled, SYSCALLBUF_FDS_DISABLED_SIZE); } static bool is_fd_open(Task* t, int fd) { char path[PATH_MAX]; sprintf(path, "/proc/%d/fd/%d", t->tid, fd); struct stat st; return 0 == lstat(path, &st); } void FdTable::update_for_cloexec(Task* t, TraceTaskEvent& event) { ASSERT(t, has_task(t)); vector fds_to_close; if (t->session().is_recording()) { for (auto& it : fds) { if (!is_fd_open(t, it.first)) { fds_to_close.push_back(it.first); } } event.set_fds_to_close(fds_to_close); } else { fds_to_close = event.fds_to_close(); } for (auto fd : fds_to_close) { did_close(fd); } } rr-4.1.0/src/FdTable.h000066400000000000000000000032151265436462100144120ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_FD_TABLE_H_ #define RR_FD_TABLE_H_ #include #include #include "AddressSpace.h" #include "FileMonitor.h" class TraceTaskEvent; class FdTable : public HasTaskSet { public: typedef std::shared_ptr shr_ptr; void add_monitor(int fd, FileMonitor* monitor); bool allow_close(int fd); Switchable will_write(Task* t, int fd); void did_write(Task* t, int fd, const std::vector& ranges); void did_dup(int from, int to); void did_close(int fd); shr_ptr clone(Task* t) { shr_ptr fds(new FdTable(*this)); fds->insert_task(t); return fds; } static shr_ptr create(Task* t) { shr_ptr fds(new FdTable()); fds->insert_task(t); return fds; } bool is_monitoring(int fd) { return fds.count(fd) > 0; } /** * Regenerate syscallbuf_fds_disabled in task |t|. * Called during initialization of the preload library. */ void init_syscallbuf_fds_disabled(Task* t); /** * Called after task |t| for this FdTable has execed. Update for any fds * that were closed via CLOEXEC. * Rather than tracking CLOEXEC flags (which would be complicated), we just * scan /proc//fd during recording and note any monitored fds that have * been closed, and record these in the TraceTaskEvent. */ void update_for_cloexec(Task* t, TraceTaskEvent& event); private: FdTable() {} FdTable(const FdTable& other) : fds(other.fds) {} void update_syscallbuf_fds_disabled(int fd); std::unordered_map fds; }; #endif /* RR_FD_TABLE_H_ */ rr-4.1.0/src/FileMonitor.h000066400000000000000000000031621265436462100153410ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_FILE_MONITOR_H_ #define RR_FILE_MONITOR_H_ class Task; #include #include #include #include #include "util.h" class FileMonitor { public: typedef std::shared_ptr shr_ptr; virtual ~FileMonitor() {} /** * Overriding this to return false will cause close() (and related fd-smashing * operations such as dup2) to return EBADF. */ virtual bool allow_close() { return true; } /** * Notification that task |t| is about to write |data| bytes of length * |length| to the file. * In general writes can block, and concurrent blocking writes to the same * file may race so that the kernel performs writes out of order * with respect to will_write notifications. * If it is known that the write cannot block (or that blocking all of rr * on it is OK), this notification can return PREVENT_SWITCH to make the * write a blocking write. This ensures that writes are performed in the order * of will_write notifications. */ virtual Switchable will_write(Task* t) { return ALLOW_SWITCH; } /** * Notification that task |t| wrote to the file descriptor. * Due to races, if will_write did not return PREVENT_SWITCH, it's possible * that the data in the buffers is not what was actually written. */ struct Range { remote_ptr data; size_t length; Range(remote_ptr data, size_t length) : data(data), length(length) {} }; virtual void did_write(Task* t, const std::vector& ranges) {} }; #endif /* RR_FILE_MONITOR_H_ */ rr-4.1.0/src/Flags.cc000066400000000000000000000003071265436462100143020ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "Flags.h" #include Flags& Flags::get_for_init() { return singleton; } Flags Flags::singleton; rr-4.1.0/src/Flags.h000066400000000000000000000041551265436462100141510ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_FLAGS_H_ #define RR_FLAGS_H_ #include #include #include #include "Ticks.h" #include "TraceFrame.h" /** * Command line arguments for rr */ struct Flags { enum { CHECKSUM_NONE = -3, CHECKSUM_SYSCALL = -2, CHECKSUM_ALL = -1 }; /* When to generate or check memory checksums. One of CHECKSUM_NONE, * CHECKSUM_SYSCALL or CHECKSUM_ALL, or a positive integer representing the * event time at which to start checksumming. */ int checksum; enum { DUMP_ON_ALL = 10000, DUMP_ON_NONE = -DUMP_ON_ALL }; /* event(s) to create memory dumps for */ TraceFrame::Time dump_on; // event enum { DUMP_AT_NONE = -1 }; /* time at which to create memory dump */ int dump_at; // global time /* True when not-absolutely-urgently-critical messages will be * logged. */ bool verbose; // Force rr to do some things that it otherwise wouldn't, for // example launching an emergency debugger when the output // doesn't seem to be a tty. bool force_things; /* Mark the trace global time along with tracee writes to * stdio. */ bool mark_stdio; // Check that cached mmaps match /proc/maps after each event. bool check_cached_mmaps; // Suppress warnings related to environmental features outside rr's // control. bool suppress_environment_warnings; // Any warning or error that would be printed is treated as fatal bool fatal_errors_and_warnings; // User override for architecture detection, e.g. when running // under valgrind. std::string forced_uarch; Flags() : checksum(CHECKSUM_NONE), dump_on(DUMP_ON_NONE), dump_at(DUMP_AT_NONE), verbose(false), force_things(false), mark_stdio(false), check_cached_mmaps(false), suppress_environment_warnings(false) {} static const Flags& get() { return singleton; } /** * Get a reference that can be used to initialize the global Flags. * Can only be called once. */ static Flags& get_for_init(); private: static Flags singleton; }; #endif /* RR_FLAGS_H_ */ rr-4.1.0/src/GdbCommand.cc000066400000000000000000000007461265436462100152500ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "GdbCommand" #include "GdbCommand.h" RR_CMD("when") { return std::string() + "Current event: " + std::to_string(t->current_trace_frame().time()) + "\n"; } RR_CMD("when-ticks") { return std::string() + "Current tick: " + std::to_string(t->tick_count()) + "\n"; } RR_CMD("when-tid") { return std::string() + "Current tid: " + std::to_string(t->tid) + "\n"; } rr-4.1.0/src/GdbCommand.h000066400000000000000000000041041265436462100151020ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_GDB_COMMAND_H_ #define RR_GDB_COMMAND_H_ #include "GdbServer.h" #include "GdbCommandHandler.h" #include #include class GdbCommand { protected: GdbCommand(const std::string& cmd_name) : cmd_name(cmd_name) { GdbCommandHandler::register_command(*this); } public: const std::string& name() const { return cmd_name; } /** * Handle the RR Cmd and return a string response to be echo'd * to the user. * * NOTE: args[0] is the command name */ virtual std::string invoke(GdbServer& gdb_server, Task* t, const std::vector& args) = 0; private: const std::string cmd_name; }; #define RR_LINE_CONCAT(str, line) str##line #define RR_LINE_EXPAND(str, line) RR_LINE_CONCAT(str, line) #define RR_CMD_CLASSNAME() RR_LINE_EXPAND(RRCmd, __LINE__) #define RR_CMD_OBJ() RR_LINE_EXPAND(sRRCmdObj, __LINE__) #define RR_CMD(name) \ class RR_CMD_CLASSNAME() : public GdbCommand { \ public: \ RR_CMD_CLASSNAME()() : GdbCommand(name) {} \ \ private: \ virtual std::string invoke(GdbServer& gdb_server, Task* t, \ const std::vector& args); \ }; \ \ static RR_CMD_CLASSNAME() RR_CMD_OBJ(); \ \ std::string RR_CMD_CLASSNAME()::invoke(GdbServer& gdb_server, Task* t, \ const std::vector& args) #endif rr-4.1.0/src/GdbCommandHandler.cc000066400000000000000000000070421265436462100165420ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "GdbCommandHandler" #include "GdbCommand.h" #include "GdbCommandHandler.h" #include "log.h" #include #include // HashMap would be better here but the unordered_map API is annoying // and linear search is fine. static std::vector* gdb_command_list; static std::string gdb_macro_binding(const GdbCommand& cmd) { return "python RRCmd('" + cmd.name() + "')\n"; } /* static */ std::string GdbCommandHandler::gdb_macros() { std::stringstream ss; ss << std::string(R"Delimiter( set python print-stack full python import re def gdb_unescape(string): result = "" pos = 0 while pos < len(string): result += chr(int(string[pos:pos+2], 16)) pos += 2 return result def gdb_escape(string): result = "" pos = 0 for curr_char in string: result += format(ord(curr_char), 'x') return result class RRCmd(gdb.Command): def __init__(self, name): gdb.Command.__init__(self, name, gdb.COMMAND_USER, gdb.COMPLETE_NONE, True) self.cmd_name = name def invoke(self, arg, from_tty): args = gdb.string_to_argv(arg) self.rr_cmd(args) def rr_cmd(self, args): rv = gdb.execute("maint packet qRRCmd:" + gdb_escape(self.cmd_name), to_string=True); rv_match = re.search('received: "(.*)"', rv, re.MULTILINE); if not rv_match: gdb.write("Response error: " + rv) return response = gdb_unescape(rv_match.group(1)) gdb.write(response) end )Delimiter"); if (gdb_command_list) { for (auto& it : *gdb_command_list) { ss << gdb_macro_binding(*it); } } return ss.str(); } static GdbCommand* command_for_name(const std::string& name) { if (!gdb_command_list) { return nullptr; } for (auto& it : *gdb_command_list) { if (it->name() == name) { return it; } } return nullptr; } void GdbCommandHandler::register_command(GdbCommand& cmd) { LOG(debug) << "registering command: " << cmd.name(); if (!gdb_command_list) { gdb_command_list = new std::vector(); } gdb_command_list->push_back(&cmd); } // Use the simplest two hex character by byte encoding static std::string gdb_escape(const std::string& str) { std::stringstream ss; ss << std::hex; for (size_t i = 0; i < str.size(); i++) { auto chr = str.at(i); ss << (int)chr; } return ss.str(); } static std::string gdb_unescape(const std::string& str) { std::stringstream ss; for (size_t i = 0; i < str.size(); i += 2) { ss << (char)std::stoul(str.substr(i, 2), nullptr, 16); } return ss.str(); } static std::vector parse_cmd(std::string& str) { std::vector args; size_t pos = 0; std::string delimiter = ":"; while ((pos = str.find(delimiter)) != std::string::npos) { args.push_back(gdb_unescape(str.substr(0, pos))); str.erase(0, pos + delimiter.length()); } args.push_back(gdb_unescape(str)); return args; } /* static */ std::string GdbCommandHandler::process_command( GdbServer& gdb_server, Task* t, std::string payload) { const std::vector args = parse_cmd(payload); GdbCommand* cmd = command_for_name(args[0]); if (!cmd) { return gdb_escape(std::string() + "Command '" + args[0] + "' not found.\n"); } LOG(debug) << "invoking command: " << cmd->name(); std::string resp = cmd->invoke(gdb_server, t, args); LOG(debug) << "cmd response: " << resp; return gdb_escape(resp); } rr-4.1.0/src/GdbCommandHandler.h000066400000000000000000000014171265436462100164040ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_GDB_COMMAND_HANDLER_H_ #define RR_GDB_COMMAND_HANDLER_H_ #include class GdbCommand; class GdbServer; class Task; class GdbCommandHandler { public: // Declare any registered command with supporting // wrapper code. static std::string gdb_macros(); static void register_command(GdbCommand& cmd); /** * Process an incoming GDB payload of the following form: * :::... * * NOTE: RR Command are typically sent with the qRRCmd: prefix which * should of been striped already. */ static std::string process_command(GdbServer& gdb_server, Task* t, std::string payload); private: }; #endif rr-4.1.0/src/GdbConnection.cc000066400000000000000000001253301265436462100157660ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "GdbConnection" #define REVERSE_EXECUTION /** * Much of this implementation is based on the documentation at * * http://sourceware.org/gdb/onlinedocs/gdb/Packets.html */ #include "GdbConnection.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "log.h" #include "GdbCommandHandler.h" #include "ReplaySession.h" #include "ScopedFd.h" #include "StringVectorToCharArray.h" static const char INTERRUPT_CHAR = '\x03'; #ifdef DEBUGTAG #define UNHANDLED_REQ() FATAL() #else #define UNHANDLED_REQ() \ write_packet(""); \ LOG(info) #endif using namespace std; const GdbThreadId GdbThreadId::ANY(0, 0); const GdbThreadId GdbThreadId::ALL(-1, -1); static bool request_needs_immediate_response(const GdbRequest* req) { switch (req->type) { case DREQ_NONE: case DREQ_CONT: return false; default: return true; } } GdbConnection::GdbConnection(pid_t tgid, const Features& features) : tgid(tgid), no_ack(false), inlen(0), outlen(0), features_(features) { #ifndef REVERSE_EXECUTION features_.reverse_execution = false; #endif } static ScopedFd open_socket(const char* address, unsigned short* port, GdbConnection::ProbePort probe) { ScopedFd listen_fd(socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0)); if (!listen_fd.is_open()) { FATAL() << "Couldn't create socket"; } struct sockaddr_in addr; addr.sin_family = AF_INET; addr.sin_addr.s_addr = inet_addr(address); int reuseaddr = 1; int ret = setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr, sizeof(reuseaddr)); if (ret < 0) { FATAL() << "Couldn't set SO_REUSEADDR"; } do { addr.sin_port = htons(*port); ret = ::bind(listen_fd, (struct sockaddr*)&addr, sizeof(addr)); if (ret && (EADDRINUSE == errno || EACCES == errno || EINVAL == errno)) { continue; } if (ret) { FATAL() << "Couldn't bind to port " << *port; } ret = listen(listen_fd, 1 /*backlogged connection*/); if (ret && EADDRINUSE == errno) { continue; } if (ret) { FATAL() << "Couldn't listen on port " << *port; } break; } while (++(*port), probe == GdbConnection::PROBE_PORT); return listen_fd; } void GdbConnection::await_debugger(ScopedFd& listen_fd) { struct sockaddr_in client_addr; socklen_t len = sizeof(client_addr); sock_fd = ScopedFd(accept(listen_fd, (struct sockaddr*)&client_addr, &len)); // We might restart this debugging session, so don't set the // socket fd CLOEXEC. } static const char connection_addr[] = "127.0.0.1"; struct DebuggerParams { char exe_image[PATH_MAX]; short port; }; unique_ptr GdbConnection::await_client_connection( unsigned short desired_port, ProbePort probe, pid_t tgid, const string& exe_image, const Features& features, ScopedFd* client_params_fd) { auto dbg = unique_ptr(new GdbConnection(tgid, features)); unsigned short port = desired_port; ScopedFd listen_fd = open_socket(connection_addr, &port, probe); if (client_params_fd) { DebuggerParams params; memset(¶ms, 0, sizeof(params)); strncpy(params.exe_image, exe_image.c_str(), sizeof(params.exe_image) - 1); params.port = port; ssize_t nwritten = write(*client_params_fd, ¶ms, sizeof(params)); assert(nwritten == sizeof(params)); } else { fprintf(stderr, "Launch gdb with\n" " gdb %s\n" "and attach to the rr debug server with:\n" " target remote :%d\n", exe_image.c_str(), port); } LOG(debug) << "limiting debugger traffic to tgid " << tgid; dbg->await_debugger(listen_fd); return dbg; } static string create_gdb_command_file(const string& macros) { char tmp[] = "/tmp/rr-gdb-commands-XXXXXX"; // This fd is just leaked. That's fine since we only call this once // per rr invocation at the moment. int fd = mkstemp(tmp); unlink(tmp); ssize_t len = macros.size(); int written = write(fd, macros.c_str(), len); if (written != len) { FATAL() << "Failed to write gdb command file"; } stringstream procfile; procfile << "/proc/" << getpid() << "/fd/" << fd; return procfile.str(); } void GdbConnection::launch_gdb(ScopedFd& params_pipe_fd, const string& macros, const string& gdb_command_file_path, const string& gdb_binary_file_path) { DebuggerParams params; ssize_t nread; while (true) { nread = read(params_pipe_fd, ¶ms, sizeof(params)); if (nread == 0) { // pipe was closed. Probably rr failed/died. return; } if (nread != -1 || errno != EINTR) { break; } } assert(nread == sizeof(params)); stringstream attach_cmd; attach_cmd << "target extended-remote " << connection_addr << ":" << params.port; LOG(debug) << "launching " << gdb_binary_file_path << " with command '" << attach_cmd.str() << "'"; vector args; args.push_back(gdb_binary_file_path); // The gdb protocol uses the "vRun" packet to reload // remote targets. The packet is specified to be like // "vCont", in which gdb waits infinitely long for a // stop reply packet. But in practice, gdb client // expects the vRun to complete within the remote-reply // timeout, after which it issues vCont. The timeout // causes gdb<-->rr communication to go haywire. // // rr can take a very long time indeed to send the // stop-reply to gdb after restarting replay; the time // to reach a specified execution target is // theoretically unbounded. Timing out on vRun is // technically a gdb bug, but because the rr replay and // the gdb reload models don't quite match up, we'll // work around it on the rr side by disabling the // remote-reply timeout. args.push_back("-l"); args.push_back("-1"); args.push_back(params.exe_image); if (gdb_command_file_path.length() > 0) { args.push_back("-x"); args.push_back(gdb_command_file_path); } if (macros.size()) { string gdb_command_file = create_gdb_command_file(macros); args.push_back("-x"); args.push_back(gdb_command_file); } args.push_back("-ex"); args.push_back(attach_cmd.str()); StringVectorToCharArray c_args(args); execvp(gdb_binary_file_path.c_str(), c_args.get()); FATAL() << "Failed to exec gdb."; } /** * Poll for data to or from gdb, waiting |timeoutMs|. 0 means "don't * wait", and -1 means "wait forever". Return true if data is ready. */ static bool poll_socket(const ScopedFd& sock_fd, short events, int timeoutMs) { struct pollfd pfd; memset(&pfd, 0, sizeof(pfd)); pfd.fd = sock_fd; pfd.events = events; int ret = poll(&pfd, 1, timeoutMs); if (ret < 0 && errno != EINTR) { FATAL() << "Polling gdb socket failed"; } return ret > 0; } static bool poll_incoming(const ScopedFd& sock_fd, int timeoutMs) { return poll_socket(sock_fd, POLLIN /* TODO: |POLLERR */, timeoutMs); } static void poll_outgoing(const ScopedFd& sock_fd, int timeoutMs) { poll_socket(sock_fd, POLLOUT /* TODO: |POLLERR */, timeoutMs); } /** * read() incoming data exactly one time, successfully. May block. */ void GdbConnection::read_data_once() { ssize_t nread; /* Wait until there's data, instead of busy-looping on * EAGAIN. */ poll_incoming(sock_fd, -1 /* wait forever */); nread = read(sock_fd, inbuf + inlen, sizeof(inbuf) - inlen); if (0 == nread) { LOG(info) << "(gdb closed debugging socket, exiting)"; exit(0); } if (nread <= 0) { FATAL() << "Error reading from gdb"; } inlen += nread; assert("Impl dynamic alloc if this fails (or double inbuf size)" && inlen < int(sizeof(inbuf))); } void GdbConnection::write_flush() { ssize_t write_index = 0; #ifdef DEBUGTAG outbuf[outlen] = '\0'; LOG(debug) << "write_flush: '" << outbuf << "'"; #endif while (write_index < outlen) { ssize_t nwritten; poll_outgoing(sock_fd, -1 /*wait forever*/); nwritten = write(sock_fd, outbuf + write_index, outlen - write_index); if (nwritten < 0) { FATAL() << "Error writing to gdb"; } write_index += nwritten; } outlen = 0; } void GdbConnection::write_data_raw(const uint8_t* data, ssize_t len) { assert("Impl dynamic alloc if this fails (or double outbuf size)" && (outlen + len) < int(sizeof(inbuf))); memcpy(outbuf + outlen, data, len); outlen += len; } void GdbConnection::write_hex(unsigned long hex) { char buf[32]; size_t len; len = snprintf(buf, sizeof(buf) - 1, "%02lx", hex); write_data_raw((uint8_t*)buf, len); } void GdbConnection::write_packet_bytes(const uint8_t* data, size_t num_bytes) { uint8_t checksum; size_t i; write_data_raw((uint8_t*)"$", 1); for (i = 0, checksum = 0; i < num_bytes; ++i) { checksum += data[i]; } write_data_raw((uint8_t*)data, num_bytes); write_data_raw((uint8_t*)"#", 1); write_hex(checksum); } void GdbConnection::write_packet(const char* data) { return write_packet_bytes((const uint8_t*)data, strlen(data)); } void GdbConnection::write_binary_packet(const char* pfx, const uint8_t* data, ssize_t num_bytes) { ssize_t pfx_num_chars = strlen(pfx); uint8_t buf[2 * num_bytes + pfx_num_chars]; ssize_t buf_num_bytes = 0; int i; strncpy((char*)buf, pfx, sizeof(buf) - 1); buf_num_bytes += pfx_num_chars; for (i = 0; i < num_bytes; ++i) { uint8_t b = data[i]; if (buf_num_bytes + 2 > ssize_t(sizeof(buf))) { break; } switch (b) { case '#': case '$': case '}': case '*': buf[buf_num_bytes++] = '}'; buf[buf_num_bytes++] = b ^ 0x20; break; default: buf[buf_num_bytes++] = b; break; } } LOG(debug) << " ***** NOTE: writing binary data, upcoming debug output may " "be truncated"; return write_packet_bytes(buf, buf_num_bytes); } void GdbConnection::write_hex_bytes_packet(const uint8_t* bytes, size_t len) { if (0 == len) { write_packet(""); return; } char buf[2 * len + 1]; for (size_t i = 0; i < len; ++i) { unsigned long b = bytes[i]; snprintf(&buf[2 * i], 3, "%02lx", b); } write_packet(buf); } static void parser_assert(bool cond) { if (!cond) { fputs("Failed to parse gdb request\n", stderr); assert(false); exit(2); } } static string decode_ascii_encoded_hex_str(const char* encoded) { ssize_t enc_len = strlen(encoded); parser_assert(enc_len % 2 == 0); string str; for (int i = 0; i < enc_len / 2; ++i) { char enc_byte[] = { encoded[2 * i], encoded[2 * i + 1], '\0' }; char* endp; int c = strtol(enc_byte, &endp, 16); parser_assert(c < 128); str += static_cast(c); } return str; } bool GdbConnection::skip_to_packet_start() { uint8_t* p = nullptr; int i; /* XXX we want memcspn() here ... */ for (i = 0; i < inlen; ++i) { if (inbuf[i] == '$' || inbuf[i] == INTERRUPT_CHAR) { p = &inbuf[i]; break; } } if (!p) { /* Discard all read bytes, which we don't care * about. */ inlen = 0; return false; } /* Discard bytes up to start-of-packet. */ memmove(inbuf, p, inlen - (p - inbuf)); inlen -= (p - inbuf); parser_assert(1 <= inlen); parser_assert('$' == inbuf[0] || INTERRUPT_CHAR == inbuf[0]); return true; } bool GdbConnection::sniff_packet() { if (skip_to_packet_start()) { /* We've already seen a (possibly partial) packet. */ return true; } parser_assert(0 == inlen); return poll_incoming(sock_fd, 0 /*don't wait*/); } void GdbConnection::read_packet() { uint8_t* p; size_t checkedlen; /* Read and discard bytes until we see the start of a * packet. * * NB: we're ignoring "+/-" responses from gdb. There doesn't * seem to be any sane reason why we would send a damaged * packet to gdb over TCP, then see a "-" reply from gdb and * somehow magically fix our bug that led to the malformed * packet in the first place. */ while (!skip_to_packet_start()) { read_data_once(); } if (inbuf[0] == INTERRUPT_CHAR) { /* Interrupts are kind of an ugly duckling in the gdb * protocol ... */ packetend = 1; return; } /* Read until we see end-of-packet. */ for (checkedlen = 0; !(p = (uint8_t*)memchr(inbuf + checkedlen, '#', inlen)); checkedlen = inlen) { read_data_once(); } packetend = (p - inbuf); /* NB: we're ignoring the gdb packet checksums here too. If * gdb is corrupted enough to garble a checksum over TCP, it's * not really clear why asking for the packet again might make * the bug go away. */ parser_assert('$' == inbuf[0] && packetend < inlen); /* Acknowledge receipt of the packet. */ if (!no_ack) { write_data_raw((uint8_t*)"+", 1); write_flush(); } } static void read_binary_data(const uint8_t* payload, const uint8_t* payload_end, vector& data) { data.clear(); while (payload < payload_end) { uint8_t b = *payload++; if ('}' == b) { parser_assert(payload < payload_end); b = 0x20 ^ *payload++; } data.push_back(b); } } /** * Parse and return a gdb thread-id from |str|. |endptr| points to * the character just after the last character in the thread-id. It * may be nullptr. */ static GdbThreadId parse_threadid(const char* str, char** endptr) { GdbThreadId t; char* endp; if ('p' == *str) { ++str; } t.pid = strtol(str, &endp, 16); parser_assert(endp); if ('\0' == *endp) { t.tid = -1; *endptr = endp; return t; } parser_assert('.' == *endp); str = endp + 1; t.tid = strtol(str, &endp, 16); *endptr = endp; return t; } bool GdbConnection::xfer(const char* name, char* args) { LOG(debug) << "gdb asks us to transfer " << name << "(" << args << ")"; if (!strcmp(name, "auxv")) { parser_assert(!strncmp(args, "read::", sizeof("read::") - 1)); req = GdbRequest(DREQ_GET_AUXV); req.target = query_thread; return true; } if (name == strstr(name, "siginfo")) { if (args == strstr(args, "read")) { req = GdbRequest(DREQ_READ_SIGINFO); req.target = query_thread; args += strlen("read"); parser_assert(':' == *args++); parser_assert(':' == *args++); req.mem().addr = strtoul(args, &args, 16); parser_assert(',' == *args++); req.mem().len = strtoul(args, &args, 16); parser_assert('\0' == *args); return true; } if (args == strstr(args, "write")) { req = GdbRequest(DREQ_WRITE_SIGINFO); req.target = query_thread; return true; } UNHANDLED_REQ() << "Unhandled 'siginfo' request: " << args; return false; } UNHANDLED_REQ() << "Unhandled gdb xfer request: " << name << "(" << args << ")"; return false; } /** * Format |value| into |buf| in the manner gdb expects. |buf| must * point at a buffer with at least |1 + 2*DBG_MAX_REG_SIZE| bytes * available. Fewer bytes than that may be written, but |buf| is * guaranteed to be null-terminated. */ static size_t print_reg_value(const GdbRegisterValue& reg, char* buf) { parser_assert(reg.size <= GdbRegisterValue::MAX_SIZE); if (reg.defined) { /* gdb wants the register value in native endianness. * reg.value read in native endianness is exactly that. */ for (size_t i = 0; i < reg.size; ++i) { snprintf(&buf[2 * i], 3, "%02lx", (unsigned long)reg.value[i]); } } else { for (size_t i = 0; i < reg.size; ++i) { strcpy(&buf[2 * i], "xx"); } } return reg.size * 2; } /** * Read the encoded register value in |strp| into |reg|. |strp| may * be mutated. */ static void read_reg_value(char** strp, GdbRegisterValue* reg) { char* str = *strp; if ('x' == str[0]) { reg->defined = false; reg->size = 0; return; } reg->defined = true; reg->size = strlen(str) / 2; for (size_t i = 0; i < reg->size; ++i) { char tmp = str[2]; str[2] = '\0'; reg->value[i] = strtoul(str, &str, 16); parser_assert('\0' == *str); str[0] = tmp; } *strp = str; } bool GdbConnection::query(char* payload) { const char* name; char* args; args = strchr(payload, ':'); if (args) { *args++ = '\0'; } name = payload; if (strstr(name, "RRCmd") == name) { LOG(debug) << "gdb requests rr cmd: " << name; req = GdbRequest(DREQ_RR_CMD); req.text_ = args; return true; } if (!strcmp(name, "C")) { LOG(debug) << "gdb requests current thread ID"; req = GdbRequest(DREQ_GET_CURRENT_THREAD); return true; } if (!strcmp(name, "Attached")) { LOG(debug) << "gdb asks if this is a new or existing process"; /* Tell gdb this is an existing process; it might be * (see emergency_debug()). */ write_packet("1"); return false; } if (!strcmp(name, "fThreadInfo")) { LOG(debug) << "gdb asks for thread list"; req = GdbRequest(DREQ_GET_THREAD_LIST); return true; } if (!strcmp(name, "sThreadInfo")) { write_packet("l"); /* "end of list" */ return false; } if (!strcmp(name, "GetTLSAddr")) { LOG(debug) << "gdb asks for TLS addr"; /* TODO */ write_packet(""); return false; } if (!strcmp(name, "Offsets")) { LOG(debug) << "gdb asks for section offsets"; req = GdbRequest(DREQ_GET_OFFSETS); req.target = query_thread; return true; } if ('P' == name[0]) { /* The docs say not to use this packet ... */ write_packet(""); return false; } if (!strcmp(name, "Supported")) { /* TODO process these */ LOG(debug) << "gdb supports " << args; stringstream supported; supported << "PacketSize=" << sizeof(outbuf); supported << ";QStartNoAckMode+" ";qXfer:auxv:read+" ";qXfer:siginfo:read+" ";qXfer:siginfo:write+" ";multiprocess+" ";ConditionalBreakpoints+"; if (features().reverse_execution) { supported << ";ReverseContinue+" ";ReverseStep+"; } write_packet(supported.str().c_str()); return false; } if (!strcmp(name, "Symbol")) { LOG(debug) << "gdb is ready for symbol lookups"; write_packet("OK"); return false; } if (strstr(name, "ThreadExtraInfo") == name) { // ThreadExtraInfo is a special snowflake that // delimits its args with ','. parser_assert(!args); args = payload; args = 1 + strchr(args, ',' /*sic*/); req = GdbRequest(DREQ_GET_THREAD_EXTRA_INFO); req.target = parse_threadid(args, &args); parser_assert('\0' == *args); return true; } if (!strcmp(name, "TStatus")) { LOG(debug) << "gdb asks for trace status"; /* XXX from the docs, it appears that we should reply * with "T0" here. But if we do, gdb keeps bothering * us with trace queries. So pretend we don't know * what it's talking about. */ write_packet(""); return false; } if (!strcmp(name, "Xfer")) { name = args; args = strchr(args, ':'); if (args) { *args++ = '\0'; } return xfer(name, args); } if (!strcmp(name, "Search")) { name = args; args = strchr(args, ':'); if (args) { *args++ = '\0'; } if (!strcmp(name, "memory") && args) { req = GdbRequest(DREQ_SEARCH_MEM); req.target = query_thread; req.mem().addr = strtoul(args, &args, 16); parser_assert(';' == *args++); req.mem().len = strtoul(args, &args, 16); parser_assert(';' == *args++); read_binary_data((const uint8_t*)args, inbuf + packetend, req.mem().data); LOG(debug) << "gdb searching memory (addr=" << HEX(req.mem().addr) << ", len=" << req.mem().len << ")"; return true; } write_packet(""); return false; } UNHANDLED_REQ() << "Unhandled gdb query: q" << name; return false; } bool GdbConnection::set_var(char* payload) { const char* name; char* args; args = strchr(payload, ':'); if (args) { *args++ = '\0'; } name = payload; if (!strcmp(name, "StartNoAckMode")) { write_packet("OK"); no_ack = true; return false; } UNHANDLED_REQ() << "Unhandled gdb set: Q" << name; return false; } void GdbConnection::consume_request() { req = GdbRequest(); write_flush(); } bool GdbConnection::process_bpacket(char* payload) { if (strcmp(payload, "c") == 0) { req = GdbRequest(DREQ_CONT); req.cont().run_direction = RUN_BACKWARD; req.cont().actions.push_back(GdbContAction(ACTION_CONTINUE, resume_thread)); return true; } else if (strcmp(payload, "s") == 0) { req = GdbRequest(DREQ_CONT); req.cont().run_direction = RUN_BACKWARD; req.cont().actions.push_back(GdbContAction(ACTION_STEP, resume_thread)); return true; } else { UNHANDLED_REQ() << "Unhandled gdb bpacket: b" << payload; return false; } } bool GdbConnection::process_vpacket(char* payload) { const char* name; char* args; args = strchr(payload, ';'); if (args) { *args++ = '\0'; } name = payload; if (!strcmp("Cont", name)) { vector actions; bool has_default_action = false; GdbContAction default_action; while (args) { char* cmd = args; while (*args != ':' && *args != ';') { if (!*args) { args = nullptr; break; } ++args; } bool is_default = true; GdbThreadId target; if (args) { if (*args == ':') { is_default = false; *args = '\0'; target = parse_threadid(args + 1, &args); } args = strchr(args, ';'); if (args) { *args = '\0'; ++args; } } GdbActionType action; int signal_to_deliver = 0; char* endptr = NULL; switch (cmd[0]) { case 'C': action = ACTION_CONTINUE; signal_to_deliver = strtol(cmd + 1, &endptr, 16); break; case 'c': action = ACTION_CONTINUE; break; case 'S': action = ACTION_STEP; signal_to_deliver = strtol(cmd + 1, &cmd, 16); break; case 's': action = ACTION_STEP; break; default: UNHANDLED_REQ() << "Unhandled vCont command " << cmd << "(" << args << ")"; return false; } if (endptr && *endptr) { UNHANDLED_REQ() << "Unhandled vCont command parameters " << cmd; return false; } if (is_default) { if (has_default_action) { UNHANDLED_REQ() << "Unhandled vCont command with multiple default actions"; return false; } has_default_action = true; default_action = GdbContAction(action, GdbThreadId::ALL, signal_to_deliver); } else { actions.push_back(GdbContAction(action, target, signal_to_deliver)); } } if (has_default_action) { actions.push_back(default_action); } req = GdbRequest(DREQ_CONT); req.cont().run_direction = RUN_FORWARD; req.cont().actions = move(actions); return true; } if (!strcmp("Cont?", name)) { LOG(debug) << "gdb queries which continue commands we support"; write_packet("vCont;c;C;s;S;"); return false; } if (!strcmp("Kill", name)) { // We can't kill tracees or replay can diverge. We // assume that this kill request is being made because // a "vRun" restart is coming right up. We know how // to implement vRun, so we'll ignore this one. LOG(debug) << "gdb asks us to kill tracee(s); ignoring"; write_packet("OK"); return false; } if (!strcmp("Run", name)) { req = GdbRequest(DREQ_RESTART); const char* filename = args; args = strchr(args, ';'); if (args) { *args++ = '\0'; } if (strlen(filename)) { FATAL() << "gdb wants us to run the exe image `" << filename << "', but we don't support that."; } if (!args) { req.restart().type = RESTART_FROM_PREVIOUS; return true; } const char* arg1 = args; args = strchr(args, ';'); if (args) { *args++ = 0; LOG(debug) << "Ignoring extra parameters " << args; } string event_str = decode_ascii_encoded_hex_str(arg1); char* endp; if (event_str[0] == 'c') { int param = strtol(event_str.c_str() + 1, &endp, 0); req.restart().type = RESTART_FROM_CHECKPOINT; req.restart().param_str = event_str.substr(1); req.restart().param = param; LOG(debug) << "next replayer restarting from checkpoint " << req.restart().param; } else { req.restart().type = RESTART_FROM_EVENT; req.restart().param = strtol(event_str.c_str(), &endp, 0); LOG(debug) << "next replayer advancing to event " << req.restart().param; } if (!endp || *endp != '\0') { LOG(debug) << "Couldn't parse event string `" << event_str << "'" << "; restarting from previous"; req.restart().type = RESTART_FROM_PREVIOUS; req.restart().param = -1; } return true; } if (name == strstr(name, "File:")) { write_packet(""); return false; } UNHANDLED_REQ() << "Unhandled gdb vpacket: v" << name; return false; } bool GdbConnection::process_packet() { char request; char* payload = nullptr; bool ret; parser_assert(INTERRUPT_CHAR == inbuf[0] || ('$' == inbuf[0] && (((uint8_t*)memchr(inbuf, '#', inlen) - inbuf) == packetend))); if (INTERRUPT_CHAR == inbuf[0]) { request = INTERRUPT_CHAR; } else { request = inbuf[1]; payload = (char*)&inbuf[2]; inbuf[packetend] = '\0'; } LOG(debug) << "raw request " << request << payload; switch (request) { case INTERRUPT_CHAR: LOG(debug) << "gdb requests interrupt"; req = GdbRequest(DREQ_INTERRUPT); ret = true; break; case 'b': ret = process_bpacket(payload); break; case 'D': LOG(debug) << "gdb is detaching from us"; req = GdbRequest(DREQ_DETACH); ret = true; break; case 'g': req = GdbRequest(DREQ_GET_REGS); req.target = query_thread; LOG(debug) << "gdb requests registers"; ret = true; break; case 'G': /* XXX we can't let gdb spray registers in general, * because it may cause replay to diverge. But some * writes may be OK. Let's see how far we can get * with ignoring these requests. */ write_packet(""); ret = false; break; case 'H': if ('c' == *payload++) { req = GdbRequest(DREQ_SET_CONTINUE_THREAD); } else { req = GdbRequest(DREQ_SET_QUERY_THREAD); } req.target = parse_threadid(payload, &payload); parser_assert('\0' == *payload); LOG(debug) << "gdb selecting " << req.target; ret = true; break; case 'k': LOG(info) << "gdb requests kill, exiting"; write_packet("OK"); exit(0); case 'm': req = GdbRequest(DREQ_GET_MEM); req.target = query_thread; req.mem().addr = strtoul(payload, &payload, 16); parser_assert(',' == *payload++); req.mem().len = strtoul(payload, &payload, 16); parser_assert('\0' == *payload); LOG(debug) << "gdb requests memory (addr=" << HEX(req.mem().addr) << ", len=" << req.mem().len << ")"; ret = true; break; case 'M': /* We can't allow the debugger to write arbitrary data * to memory, or the replay may diverge. */ // TODO: parse this packet in case some oddball gdb // decides to send it instead of 'X' write_packet(""); ret = false; break; case 'p': req = GdbRequest(DREQ_GET_REG); req.target = query_thread; req.reg().name = GdbRegister(strtoul(payload, &payload, 16)); parser_assert('\0' == *payload); LOG(debug) << "gdb requests register value (" << req.reg().name << ")"; ret = true; break; case 'P': req = GdbRequest(DREQ_SET_REG); req.target = query_thread; req.reg().name = GdbRegister(strtoul(payload, &payload, 16)); parser_assert('=' == *payload++); read_reg_value(&payload, &req.reg()); parser_assert('\0' == *payload); ret = true; break; case 'q': ret = query(payload); break; case 'Q': ret = set_var(payload); break; case 'T': req = GdbRequest(DREQ_GET_IS_THREAD_ALIVE); req.target = parse_threadid(payload, &payload); parser_assert('\0' == *payload); LOG(debug) << "gdb wants to know if " << req.target << " is alive"; ret = true; break; case 'v': ret = process_vpacket(payload); break; case 'X': { req = GdbRequest(DREQ_SET_MEM); req.target = query_thread; req.mem().addr = strtoul(payload, &payload, 16); parser_assert(',' == *payload++); req.mem().len = strtoul(payload, &payload, 16); parser_assert(':' == *payload++); read_binary_data((const uint8_t*)payload, inbuf + packetend, req.mem().data); parser_assert(req.mem().len == req.mem().data.size()); LOG(debug) << "gdb setting memory (addr=" << HEX(req.mem().addr) << ", len=" << req.mem().len << ")"; ret = true; break; } case 'z': case 'Z': { int type = strtol(payload, &payload, 16); parser_assert(',' == *payload++); if (!(0 <= type && type <= 4)) { LOG(warn) << "Unknown watch type " << type; write_packet(""); ret = false; break; } req = GdbRequest(GdbRequestType( type + (request == 'Z' ? DREQ_SET_SW_BREAK : DREQ_REMOVE_SW_BREAK))); req.watch().addr = strtoul(payload, &payload, 16); parser_assert(',' == *payload); payload++; req.watch().kind = strtoul(payload, &payload, 16); if (';' == *payload) { ++payload; while ('X' == *payload) { ++payload; int len = strtol(payload, &payload, 16); parser_assert(',' == *payload); payload++; vector bytes; for (int i = 0; i < len; ++i) { parser_assert(payload[0] && payload[1]); char tmp = payload[2]; payload[2] = '\0'; bytes.push_back(strtol(payload, &payload, 16)); parser_assert('\0' == *payload); payload[0] = tmp; } req.watch().conditions.push_back(move(bytes)); } } parser_assert('\0' == *payload); LOG(debug) << "gdb requests " << ('Z' == request ? "set" : "remove") << "breakpoint (addr=" << HEX(req.watch().addr) << ", len=" << req.watch().kind << ")"; ret = true; break; } case '!': LOG(debug) << "gdb requests extended mode"; write_packet("OK"); ret = false; break; case '?': LOG(debug) << "gdb requests stop reason"; req = GdbRequest(DREQ_GET_STOP_REASON); req.target = query_thread; ret = true; break; default: UNHANDLED_REQ() << "Unhandled gdb request '" << inbuf[1] << "'"; ret = false; } /* Erase the newly processed packet from the input buffer. */ memmove(inbuf, inbuf + packetend, inlen - packetend); inlen = (inlen - packetend); /* If we processed the request internally, consume it. */ if (!ret) { consume_request(); } return ret; } void GdbConnection::notify_no_such_thread(const GdbRequest& req) { assert(!memcmp(&req, &this->req, sizeof(this->req))); /* '10' is the errno ECHILD. We use it as a magic code to * notify the user that the thread that was the target of this * request has died, and either gdb didn't notice that, or rr * didn't notify gdb. Either way, the user should restart * their debugging session. */ LOG(error) << "Targeted thread no longer exists; this is the result of " "either a gdb or\n" "rr bug. Please restart your debugging session and avoid " "doing whatever\n" "triggered this bug."; write_packet("E10"); consume_request(); } void GdbConnection::notify_restart() { assert(DREQ_RESTART == req.type); // These threads may not exist at the first trace-stop after // restart. The gdb client should reset this state, but help // it out just in case. resume_thread = GdbThreadId::ANY; query_thread = GdbThreadId::ANY; req = GdbRequest(); } GdbRequest GdbConnection::get_request() { if (DREQ_RESTART == req.type) { LOG(debug) << "consuming RESTART request"; notify_restart(); // gdb wants to be notified with a stop packet when // the process "relaunches". In rr's case, the // traceee may be very far away from process creation, // but that's OK. req = GdbRequest(DREQ_GET_STOP_REASON); req.target = query_thread; return req; } /* Can't ask for the next request until you've satisfied the * current one, for requests that need an immediate * response. */ assert(!request_needs_immediate_response(&req)); if (!sniff_packet() && req.is_resume_request()) { /* There's no new request data available and gdb has * already asked us to resume. OK, do that (or keep * doing that) now. */ return req; } while (true) { /* There's either new request data, or we have nothing * to do. Either way, block until we read a complete * packet from gdb. */ read_packet(); if (process_packet()) { /* We couldn't process the packet internally, * so the target has to do something. */ return req; } /* The packet we got was "internal", gdb details. * Nothing for the target to do yet. Keep waiting. */ } } void GdbConnection::notify_exit_code(int code) { char buf[64]; assert(req.is_resume_request() || req.type == DREQ_INTERRUPT); snprintf(buf, sizeof(buf) - 1, "W%02x", code); write_packet(buf); consume_request(); } void GdbConnection::notify_exit_signal(int sig) { char buf[64]; assert(req.is_resume_request() || req.type == DREQ_INTERRUPT); snprintf(buf, sizeof(buf) - 1, "X%02x", sig); write_packet(buf); consume_request(); } /** * Translate linux-x86 |sig| to gdb's internal numbering. Translation * made according to gdb/include/gdb/signals.def. */ static int to_gdb_signum(int sig) { switch (sig) { case 0: return 0; case SIGHUP: return 1; case SIGINT: return 2; case SIGQUIT: return 3; case SIGILL: return 4; case SIGTRAP: return 5; case SIGABRT /*case SIGIOT*/: return 6; case SIGBUS: return 10; case SIGFPE: return 8; case SIGKILL: return 9; case SIGUSR1: return 30; case SIGSEGV: return 11; case SIGUSR2: return 31; case SIGPIPE: return 13; case SIGALRM: return 14; case SIGTERM: return 15; /* gdb hasn't heard of SIGSTKFLT, so this is * arbitrarily made up. SIGDANGER just sounds cool.*/ case SIGSTKFLT: return 38 /*GDB_SIGNAL_DANGER*/; /*case SIGCLD*/ case SIGCHLD: return 20; case SIGCONT: return 19; case SIGSTOP: return 17; case SIGTSTP: return 18; case SIGTTIN: return 21; case SIGTTOU: return 22; case SIGURG: return 16; case SIGXCPU: return 24; case SIGXFSZ: return 25; case SIGVTALRM: return 26; case SIGPROF: return 27; case SIGWINCH: return 28; /*case SIGPOLL*/ case SIGIO: return 23; case SIGPWR: return 32; case SIGSYS: return 12; case 32: return 77; default: if (33 <= sig && sig <= 63) { /* GDB_SIGNAL_REALTIME_33 is numbered 45, hence this offset. */ return sig + 12; } if (64 <= sig && sig <= 127) { /* GDB_SIGNAL_REALTIME_64 is numbered 78, hence this offset. */ return sig + 14; } LOG(warn) << "Unknown signal " << sig; return 143; // GDB_SIGNAL_UNKNOWN } } void GdbConnection::send_stop_reply_packet(GdbThreadId thread, int sig, uintptr_t watch_addr) { if (sig < 0) { write_packet("E01"); return; } char watch[1024]; if (watch_addr) { snprintf(watch, sizeof(watch) - 1, "watch:%" PRIxPTR ";", watch_addr); } else { watch[0] = '\0'; } char buf[PATH_MAX]; snprintf(buf, sizeof(buf) - 1, "T%02xthread:p%02x.%02x;%s", to_gdb_signum(sig), thread.pid, thread.tid, watch); write_packet(buf); } void GdbConnection::notify_stop(GdbThreadId thread, int sig, uintptr_t watch_addr) { assert(req.is_resume_request() || req.type == DREQ_INTERRUPT); if (tgid != thread.pid) { LOG(debug) << "ignoring stop of " << thread << " because we're debugging tgid " << tgid; // Re-use the existing continue request to advance to // the next stop we're willing to tell gdb about. return; } send_stop_reply_packet(thread, sig, watch_addr); // This isn't documented in the gdb remote protocol, but if we // don't do this, gdb will sometimes continue to send requests // for the previously-stopped thread when it obviously intends // to be making requests about the stopped thread. // Setting to ANY here will make the client choose the correct default thread. LOG(debug) << "forcing query/resume thread to ANY"; query_thread = GdbThreadId::ANY; resume_thread = GdbThreadId::ANY; consume_request(); } void GdbConnection::notify_restart_failed() { assert(DREQ_RESTART == req.type); // TODO: it's not known by this author whether gdb knows how // to recover from a failed "run" request. write_packet("E01"); consume_request(); } void GdbConnection::reply_get_current_thread(GdbThreadId thread) { assert(DREQ_GET_CURRENT_THREAD == req.type); char buf[1024]; snprintf(buf, sizeof(buf), "QCp%02x.%02x", thread.pid, thread.tid); write_packet(buf); consume_request(); } void GdbConnection::reply_get_auxv(const vector& auxv) { assert(DREQ_GET_AUXV == req.type); if (!auxv.empty()) { write_binary_packet("l", auxv.data(), auxv.size()); } else { write_packet("E01"); } consume_request(); } void GdbConnection::reply_get_is_thread_alive(bool alive) { assert(DREQ_GET_IS_THREAD_ALIVE == req.type); write_packet(alive ? "OK" : "E01"); consume_request(); } void GdbConnection::reply_get_thread_extra_info(const string& info) { assert(DREQ_GET_THREAD_EXTRA_INFO == req.type); LOG(debug) << "thread extra info: '" << info.c_str() << "'"; write_hex_bytes_packet((const uint8_t*)info.c_str(), 1 + info.length()); consume_request(); } void GdbConnection::reply_select_thread(bool ok) { assert(DREQ_SET_CONTINUE_THREAD == req.type || DREQ_SET_QUERY_THREAD == req.type); if (ok && DREQ_SET_CONTINUE_THREAD == req.type) { resume_thread = req.target; } else if (ok && DREQ_SET_QUERY_THREAD == req.type) { query_thread = req.target; } write_packet(ok ? "OK" : "E01"); consume_request(); } void GdbConnection::reply_get_mem(const vector& mem) { assert(DREQ_GET_MEM == req.type); assert(mem.size() <= req.mem().len); if (req.mem().len > 0 && mem.size() == 0) { write_packet("E01"); } else { write_hex_bytes_packet(mem.data(), mem.size()); } consume_request(); } void GdbConnection::reply_set_mem(bool ok) { assert(DREQ_SET_MEM == req.type); write_packet(ok ? "OK" : "E01"); consume_request(); } void GdbConnection::reply_search_mem(bool found, remote_ptr addr) { assert(DREQ_SEARCH_MEM == req.type); if (found) { char buf[256]; sprintf(buf, "1,%llx", (long long)addr.as_int()); write_packet(buf); } else { write_packet("0"); } consume_request(); } void GdbConnection::reply_get_offsets(/* TODO */) { assert(DREQ_GET_OFFSETS == req.type); /* XXX FIXME TODO */ write_packet(""); consume_request(); } void GdbConnection::reply_get_reg(const GdbRegisterValue& reg) { char buf[2 * GdbRegisterValue::MAX_SIZE + 1]; assert(DREQ_GET_REG == req.type); print_reg_value(reg, buf); write_packet(buf); consume_request(); } void GdbConnection::reply_get_regs(const GdbRegisterFile& file) { size_t n_regs = file.total_registers(); char buf[n_regs * 2 * GdbRegisterValue::MAX_SIZE + 1]; assert(DREQ_GET_REGS == req.type); size_t offset = 0; for (auto it = file.regs.begin(), end = file.regs.end(); it != end; ++it) { offset += print_reg_value(*it, &buf[offset]); } write_packet(buf); consume_request(); } void GdbConnection::reply_set_reg(bool ok) { assert(DREQ_SET_REG == req.type); // TODO: what happens if we're forced to reply to a // set-register request with |ok = false|, leading us to // pretend not to understand the packet? If, later, an // experimental session needs the set-register request will it // not be sent? // // We can't reply with an error packet here because gdb thinks // that failed set-register requests are catastrophic. write_packet(ok ? "OK" : ""); consume_request(); } void GdbConnection::reply_get_stop_reason(GdbThreadId which, int sig) { assert(DREQ_GET_STOP_REASON == req.type); send_stop_reply_packet(which, sig); consume_request(); } void GdbConnection::reply_get_thread_list(const vector& threads) { assert(DREQ_GET_THREAD_LIST == req.type); if (threads.empty()) { write_packet("l"); } else { ssize_t maxlen = 1 /*m char*/ + threads.size() * (1 /*p*/ + 2 * sizeof(threads[0]) + 1 /*,*/) + 1 /*\0*/; char* str = (char*)malloc(maxlen); int offset = 0; str[offset++] = 'm'; for (size_t i = 0; i < threads.size(); ++i) { const GdbThreadId& t = threads[i]; if (tgid != t.pid) { continue; } offset += snprintf(&str[offset], maxlen - offset, "p%02x.%02x,", t.pid, t.tid); } /* Overwrite the trailing ',' */ str[offset - 1] = '\0'; write_packet(str); free(str); } consume_request(); } void GdbConnection::reply_watchpoint_request(bool ok) { assert(DREQ_WATCH_FIRST <= req.type && req.type <= DREQ_WATCH_LAST); write_packet(ok ? "OK" : "E01"); consume_request(); } void GdbConnection::reply_detach() { assert(DREQ_DETACH <= req.type); write_packet("OK"); consume_request(); } void GdbConnection::reply_read_siginfo(const vector& si_bytes) { assert(DREQ_READ_SIGINFO == req.type); if (si_bytes.empty()) { write_packet("E01"); } else { write_binary_packet("l", si_bytes.data(), si_bytes.size()); } consume_request(); } void GdbConnection::reply_write_siginfo(/* TODO*/) { assert(DREQ_WRITE_SIGINFO == req.type); write_packet("E01"); consume_request(); } void GdbConnection::reply_rr_cmd(const std::string& text) { assert(DREQ_RR_CMD == req.type); write_packet(text.c_str()); consume_request(); } rr-4.1.0/src/GdbConnection.h000066400000000000000000000406111265436462100156260ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_GDB_CONNECTION_H_ #define RR_GDB_CONNECTION_H_ #include #include #include #include #include #include #include "GdbRegister.h" #include "Registers.h" #include "ReplaySession.h" #include "ReplayTimeline.h" /** * Descriptor for task within a task group. Note: on linux, we can * uniquely identify any thread by its |tid| (ignoring pid * namespaces). */ struct GdbThreadId { GdbThreadId(pid_t pid = -1, pid_t tid = -1) : pid(pid), tid(tid) {} pid_t pid; pid_t tid; bool operator==(const GdbThreadId& o) const { return pid == o.pid && tid == o.tid; } static const GdbThreadId ANY; static const GdbThreadId ALL; }; inline std::ostream& operator<<(std::ostream& o, const GdbThreadId& t) { o << t.pid << "." << t.tid; return o; } /** * Represents a possibly-undefined register |name|. |size| indicates how * many bytes of |value| are valid, if any. */ struct GdbRegisterValue { enum { MAX_SIZE = Registers::MAX_SIZE }; GdbRegister name; union { uint8_t value[MAX_SIZE]; uint8_t value1; uint16_t value2; uint32_t value4; uint64_t value8; }; size_t size; bool defined; }; /** * Represents the register file, indexed by |DbgRegister| values * above. */ struct GdbRegisterFile { std::vector regs; GdbRegisterFile(size_t n_regs) : regs(n_regs){}; size_t total_registers() const { return regs.size(); } }; enum GdbRequestType { DREQ_NONE = 0, /* None of these requests have parameters. */ DREQ_GET_CURRENT_THREAD, DREQ_GET_OFFSETS, DREQ_GET_REGS, DREQ_GET_STOP_REASON, DREQ_GET_THREAD_LIST, DREQ_INTERRUPT, DREQ_DETACH, /* These use params.target. */ DREQ_GET_AUXV, DREQ_GET_IS_THREAD_ALIVE, DREQ_GET_THREAD_EXTRA_INFO, DREQ_SET_CONTINUE_THREAD, DREQ_SET_QUERY_THREAD, // gdb wants to write back siginfo_t to a tracee. More // importantly, this packet arrives before an experiment // session for a |call foo()| is about to be torn down. // // TODO: actual interface NYI. DREQ_WRITE_SIGINFO, /* These use params.mem. */ DREQ_GET_MEM, DREQ_SET_MEM, // gdb wants to read the current siginfo_t for a stopped // tracee. More importantly, this packet arrives at the very // beginning of a |call foo()| experiment. // // Uses .mem for offset/len. DREQ_READ_SIGINFO, DREQ_SEARCH_MEM, DREQ_MEM_FIRST = DREQ_GET_MEM, DREQ_MEM_LAST = DREQ_SEARCH_MEM, DREQ_REMOVE_SW_BREAK, DREQ_REMOVE_HW_BREAK, DREQ_REMOVE_WR_WATCH, DREQ_REMOVE_RD_WATCH, DREQ_REMOVE_RDWR_WATCH, DREQ_SET_SW_BREAK, DREQ_SET_HW_BREAK, DREQ_SET_WR_WATCH, DREQ_SET_RD_WATCH, DREQ_SET_RDWR_WATCH, DREQ_WATCH_FIRST = DREQ_REMOVE_SW_BREAK, DREQ_WATCH_LAST = DREQ_SET_RDWR_WATCH, /* Use params.reg. */ DREQ_GET_REG, DREQ_SET_REG, DREQ_REG_FIRST = DREQ_GET_REG, DREQ_REG_LAST = DREQ_SET_REG, /* Use params.cont. */ DREQ_CONT, /* gdb host detaching from stub. No parameters. */ /* Uses params.restart. */ DREQ_RESTART, /* Uses params.text. */ DREQ_RR_CMD, }; enum GdbRestartType { RESTART_FROM_PREVIOUS, RESTART_FROM_EVENT, RESTART_FROM_CHECKPOINT, }; enum GdbActionType { ACTION_CONTINUE, ACTION_STEP }; struct GdbContAction { GdbContAction(GdbActionType type = ACTION_CONTINUE, const GdbThreadId& target = GdbThreadId::ANY, int signal_to_deliver = 0) : type(type), target(target), signal_to_deliver(signal_to_deliver) {} GdbActionType type; GdbThreadId target; int signal_to_deliver; }; /** * These requests are made by the debugger host and honored in proxy * by rr, the target. */ struct GdbRequest { GdbRequest(GdbRequestType type = DREQ_NONE) : type(type), suppress_debugger_stop(false) {} GdbRequest(const GdbRequest& other) : type(other.type), target(other.target), suppress_debugger_stop(other.suppress_debugger_stop), mem_(other.mem_), watch_(other.watch_), reg_(other.reg_), restart_(other.restart_), cont_(other.cont_), text_(other.text_) {} GdbRequest& operator=(const GdbRequest& other) { this->~GdbRequest(); new (this) GdbRequest(other); return *this; } const GdbRequestType type; GdbThreadId target; bool suppress_debugger_stop; struct Mem { uintptr_t addr; size_t len; // For SET_MEM requests, the |len| raw bytes that are to be written. // For SEARCH_MEM requests, the bytes to search for. std::vector data; } mem_; struct Watch { uintptr_t addr; int kind; std::vector > conditions; } watch_; GdbRegisterValue reg_; struct Restart { int param; std::string param_str; GdbRestartType type; } restart_; struct Cont { RunDirection run_direction; std::vector actions; } cont_; std::string text_; Mem& mem() { assert(type >= DREQ_MEM_FIRST && type <= DREQ_MEM_LAST); return mem_; } const Mem& mem() const { assert(type >= DREQ_MEM_FIRST && type <= DREQ_MEM_LAST); return mem_; } Watch& watch() { assert(type >= DREQ_WATCH_FIRST && type <= DREQ_WATCH_LAST); return watch_; } const Watch& watch() const { assert(type >= DREQ_WATCH_FIRST && type <= DREQ_WATCH_LAST); return watch_; } GdbRegisterValue& reg() { assert(type >= DREQ_REG_FIRST && type <= DREQ_REG_LAST); return reg_; } const GdbRegisterValue& reg() const { assert(type >= DREQ_REG_FIRST && type <= DREQ_REG_LAST); return reg_; } Restart& restart() { assert(type == DREQ_RESTART); return restart_; } const Restart& restart() const { assert(type == DREQ_RESTART); return restart_; } Cont& cont() { assert(type == DREQ_CONT); return cont_; } const Cont& cont() const { assert(type == DREQ_CONT); return cont_; } const std::string& text() const { assert(type == DREQ_RR_CMD); return text_; } /** * Return nonzero if this requires that program execution be resumed * in some way. */ bool is_resume_request() const { return type == DREQ_CONT; } }; /** * This struct wraps up the state of the gdb protocol, so that we can * offer a (mostly) stateless interface to clients. */ class GdbConnection { public: /** * Wait for exactly one gdb host to connect to this remote target on * IP address 127.0.0.1, port |port|. If |probe| is nonzero, a unique * port based on |start_port| will be searched for. Otherwise, if * |port| is already bound, this function will fail. * * Pass the |tgid| of the task on which this debug-connection request * is being made. The remaining debugging session will be limited to * traffic regarding |tgid|, but clients don't need to and shouldn't * need to assume that. * * If we're opening this connection on behalf of a known client, pass * an fd in |client_params_fd|; we'll write the allocated port and |exe_image| * through the fd before waiting for a connection. |exe_image| is the * process that will be debugged by client, or null ptr if there isn't * a client. * * This function is infallible: either it will return a valid * debugging context, or it won't return. */ enum ProbePort { DONT_PROBE = 0, PROBE_PORT }; struct Features { Features() : reverse_execution(true) {} bool reverse_execution; }; static std::unique_ptr await_client_connection( unsigned short desired_port, ProbePort probe, pid_t tgid, const std::string& exe_image, const Features& features, ScopedFd* client_params_fd = nullptr); /** * Exec gdb using the params that were written to * |params_pipe_fd|. Optionally, pre-define in the gdb client the set * of macros defined in |macros| if nonnull. */ static void launch_gdb(ScopedFd& params_pipe_fd, const std::string& macros, const std::string& gdb_command_file_path, const std::string& gdb_binary_file_path); /** * Call this when the target of |req| is needed to fulfill the * request, but the target is dead. This situation is a symptom of a * gdb or rr bug. */ void notify_no_such_thread(const GdbRequest& req); /** * Finish a DREQ_RESTART request. Should be invoked after replay * restarts and prior GdbConnection has been restored. */ void notify_restart(); /** * Return the current request made by the debugger host, that needs to * be satisfied. This function will block until either there's a * debugger host request that needs a response, or until a request is * made to resume execution of the target. In the latter case, * calling this function multiple times will return an appropriate * resume request each time (see above). * * The target should peek at the debugger request in between execution * steps. A new request may need to be serviced. */ GdbRequest get_request(); /** * Notify the host that this process has exited with |code|. */ void notify_exit_code(int code); /** * Notify the host that this process has exited from |sig|. */ void notify_exit_signal(int sig); /** * Notify the host that a resume request has "finished", i.e., the * target has stopped executing for some reason. |sig| is the signal * that stopped execution, or 0 if execution stopped otherwise. */ void notify_stop(GdbThreadId which, int sig, uintptr_t watch_addr = 0); /** Notify the debugger that a restart request failed. */ void notify_restart_failed(); /** * Tell the host that |thread| is the current thread. */ void reply_get_current_thread(GdbThreadId thread); /** * Reply with the target thread's |auxv| pairs. |auxv.empty()| * if there was an error reading the auxiliary vector. */ void reply_get_auxv(const std::vector& auxv); /** * |alive| is true if the requested thread is alive, false if dead. */ void reply_get_is_thread_alive(bool alive); /** * |info| is a string containing data about the request target that * might be relevant to the debugger user. */ void reply_get_thread_extra_info(const std::string& info); /** * |ok| is true if req->target can be selected, false otherwise. */ void reply_select_thread(bool ok); /** * The first |mem.size()| bytes of the request were read into |mem|. * |mem.size()| must be less than or equal to the length of the request. */ void reply_get_mem(const std::vector& mem); /** * |ok| is true if a SET_MEM request succeeded, false otherwise. This * function *must* be called whenever a SET_MEM request is made, * regardless of success/failure or special interpretation. */ void reply_set_mem(bool ok); /** * Reply to the DREQ_SEARCH_MEM request. * |found| is true if we found the searched-for bytes starting at address * |addr|. */ void reply_search_mem(bool found, remote_ptr addr); /** * Reply to the DREQ_GET_OFFSETS request. */ void reply_get_offsets(/* TODO */); /** * Send |value| back to the debugger host. |value| may be undefined. */ void reply_get_reg(const GdbRegisterValue& value); /** * Send |file| back to the debugger host. |file| may contain * undefined register values. */ void reply_get_regs(const GdbRegisterFile& file); /** * Pass |ok = true| iff the requested register was successfully set. */ void reply_set_reg(bool ok); /** * Reply to the DREQ_GET_STOP_REASON request. */ void reply_get_stop_reason(GdbThreadId which, int sig); /** * |threads| contains the list of live threads, of which there are * |len|. */ void reply_get_thread_list(const std::vector& threads); /** * |ok| is true if the request was successfully applied, false if * not. */ void reply_watchpoint_request(bool ok); /** * DREQ_DETACH was processed. * * There's no functional reason to reply to the detach request. * However, some versions of gdb expect a response and time out * awaiting it, wasting developer time. */ void reply_detach(); /** * Pass the siginfo_t and its size (as requested by the debugger) in * |si_bytes| and |num_bytes| if successfully read. Otherwise pass * |si_bytes = nullptr|. */ void reply_read_siginfo(const std::vector& si_bytes); /** * Not yet implemented, but call this after a WRITE_SIGINFO request * anyway. */ void reply_write_siginfo(/* TODO*/); /** * Send a manual text response to a rr cmd (maintenance) packet. */ void reply_rr_cmd(const std::string& text); /** * Create a checkpoint of the given Session with the given id. Delete the * existing checkpoint with that id if there is one. */ void created_checkpoint(ReplaySession::shr_ptr& checkpoint, int checkpoint_id); /** * Delete the checkpoint with the given id. Silently fail if the checkpoint * does not exist. */ void delete_checkpoint(int checkpoint_id); /** * Get the checkpoint with the given id. Return null if not found. */ ReplaySession::shr_ptr get_checkpoint(int checkpoint_id); /** * Return true if there's a new packet to be read/process (whether * incomplete or not), and false if there isn't one. */ bool sniff_packet(); const Features& features() { return features_; } private: GdbConnection(pid_t tgid, const Features& features); /** * Wait for a debugger client to connect to |dbg|'s socket. Blocks * indefinitely. */ void await_debugger(ScopedFd& listen_fd); /** * read() incoming data exactly one time, successfully. May block. */ void read_data_once(); /** * Send all pending output to gdb. May block. */ void write_flush(); void write_data_raw(const uint8_t* data, ssize_t len); void write_hex(unsigned long hex); void write_packet_bytes(const uint8_t* data, size_t num_bytes); void write_packet(const char* data); void write_binary_packet(const char* pfx, const uint8_t* data, ssize_t num_bytes); void write_hex_bytes_packet(const uint8_t* bytes, size_t len); /** * Consume bytes in the input buffer until start-of-packet ('$') or * the interrupt character is seen. Does not block. Return true if * seen, false if not. */ bool skip_to_packet_start(); /** * Block until the sequence of bytes * * "[^$]*\$[^#]*#.*" * * has been read from the client fd. This is one (or more) gdb * packet(s). */ void read_packet(); /** * Return true if we need to do something in a debugger request, * false if we already handled the packet internally. */ bool xfer(const char* name, char* args); /** * Return true if we need to do something in a debugger request, * false if we already handled the packet internally. */ bool query(char* payload); /** * Return true if we need to do something in a debugger request, * false if we already handled the packet internally. */ bool set_var(char* payload); /** * Return true if we need to do something in a debugger request, * false if we already handled the packet internally. */ bool process_vpacket(char* payload); /** * Return true if we need to do something in a debugger request, * false if we already handled the packet internally. */ bool process_bpacket(char* payload); /** * Return true if we need to do something in a debugger request, * false if we already handled the packet internally. */ bool process_packet(); void consume_request(); void send_stop_reply_packet(GdbThreadId thread, int sig, uintptr_t watch_addr = 0); // Current request to be processed. GdbRequest req; // Thread to be resumed. GdbThreadId resume_thread; // Thread for get/set requests. GdbThreadId query_thread; // gdb and rr don't work well together in multi-process and // multi-exe-image debugging scenarios, so we pretend only // this task group exists when interfacing with gdb pid_t tgid; // true when "no-ack mode" enabled, in which we don't have // to send ack packets back to gdb. This is a huge perf win. bool no_ack; ScopedFd sock_fd; /* XXX probably need to dynamically size these */ uint8_t inbuf[32768]; /* buffered input from gdb */ ssize_t inlen; /* length of valid data */ ssize_t packetend; /* index of '#' character */ uint8_t outbuf[32768]; /* buffered output for gdb */ ssize_t outlen; Features features_; }; #endif /* RR_GDB_CONNECTION_H_ */ rr-4.1.0/src/GdbExpression.cc000066400000000000000000000261601265436462100160270ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "GdbExpression.h" #include "GdbServer.h" #include "task.h" using namespace std; #define WORKAROUND_GDB_BUGS // Extracted from // https://sourceware.org/gdb/current/onlinedocs/gdb/Bytecode-Descriptions.html enum Opcode { OP_float = 0x01, OP_add = 0x02, OP_sub = 0x03, OP_mul = 0x04, OP_div_signed = 0x05, OP_div_unsigned = 0x06, OP_rem_signed = 0x07, OP_rem_unsigned = 0x08, OP_lsh = 0x09, OP_rsh_signed = 0x0a, OP_rsh_unsigned = 0x0b, OP_trace = 0x0c, OP_trace_quick = 0x0d, OP_log_not = 0x0e, OP_bit_and = 0x0f, OP_bit_or = 0x10, OP_bit_xor = 0x11, OP_bit_not = 0x12, OP_equal = 0x13, OP_less_signed = 0x14, OP_less_unsigned = 0x15, OP_ext = 0x16, OP_ref8 = 0x17, OP_ref16 = 0x18, OP_ref32 = 0x19, OP_ref64 = 0x1a, OP_ref_float = 0x1b, OP_ref_double = 0x1c, OP_ref_long_double = 0x1d, OP_l_to_d = 0x1e, OP_d_to_l = 0x1f, OP_if_goto = 0x20, OP_goto = 0x21, OP_const8 = 0x22, OP_const16 = 0x23, OP_const32 = 0x24, OP_const64 = 0x25, OP_reg = 0x26, OP_end = 0x27, OP_dup = 0x28, OP_pop = 0x29, OP_zero_ext = 0x2a, OP_swap = 0x2b, OP_getv = 0x2c, OP_setv = 0x2d, OP_tracev = 0x2e, OP_tracenz = 0x2f, OP_trace16 = 0x30, OP_pick = 0x32, OP_rot = 0x33, OP_printf = 0x34, }; struct ExpressionState { typedef GdbExpression::Value Value; ExpressionState(const vector& bytecode) : bytecode(bytecode), pc(0), error(false), end(false) {} void set_error() { error = true; } // Methods set error to true if there's an error and return some sentinel // Value. Value pop() { if (stack.empty()) { set_error(); return Value(-1); } Value v = stack.back(); stack.pop_back(); return v; } struct BinaryOperands { BinaryOperands(int64_t a = 0, int64_t b = 0) : a(a), b(b) {} int64_t a; int64_t b; }; BinaryOperands pop_a_b() { int64_t b = pop().i; return BinaryOperands(pop().i, b); } int64_t nonzero(int64_t v) { if (!v) { set_error(); return 1; } return v; } int64_t pop_a() { return pop().i; } void push(int64_t i) { stack.push_back(Value(i)); } template T fetch() { if (pc + sizeof(T) > bytecode.size()) { set_error(); return T(-1); } T v = 0; for (size_t i = 0; i < sizeof(T); ++i) { v = (v << 8) | bytecode[pc + i]; } pc += sizeof(T); return v; } template void load(Task* t) { uint64_t addr = pop().i; if (error) { // Don't do unnecessary syscalls if we're already in an error state. return; } bool ok = true; T v = t->read_mem(remote_ptr(addr), &ok); if (!ok) { set_error(); return; } push(v); } void pick(size_t offset) { if (offset >= stack.size()) { set_error(); return; } push(stack[stack.size() - 1 - offset].i); } void step(Task* t) { assert(!error); BinaryOperands operands; switch (fetch()) { case OP_add: operands = pop_a_b(); return push(operands.a + operands.b); case OP_sub: operands = pop_a_b(); return push(operands.a - operands.b); case OP_mul: operands = pop_a_b(); return push(operands.a * operands.b); case OP_div_signed: operands = pop_a_b(); return push(operands.a / nonzero(operands.b)); case OP_div_unsigned: operands = pop_a_b(); return push(uint64_t(operands.a) / uint64_t(nonzero(operands.b))); case OP_rem_signed: operands = pop_a_b(); return push(operands.a % nonzero(operands.b)); case OP_rem_unsigned: operands = pop_a_b(); return push(uint64_t(operands.a) % uint64_t(nonzero(operands.b))); case OP_lsh: operands = pop_a_b(); return push(operands.a << operands.b); case OP_rsh_signed: operands = pop_a_b(); return push(operands.a >> operands.b); case OP_rsh_unsigned: operands = pop_a_b(); return push(uint64_t(operands.a) >> operands.b); case OP_log_not: return push(!pop_a()); case OP_bit_and: operands = pop_a_b(); return push(operands.a & operands.b); case OP_bit_or: operands = pop_a_b(); return push(operands.a | operands.b); case OP_bit_xor: operands = pop_a_b(); return push(operands.a ^ operands.b); case OP_bit_not: return push(~pop_a()); case OP_equal: operands = pop_a_b(); return push(operands.a == operands.b); case OP_less_signed: operands = pop_a_b(); return push(operands.a < operands.b); case OP_less_unsigned: operands = pop_a_b(); return push(uint64_t(operands.a) < uint64_t(operands.b)); case OP_ext: { int64_t n = nonzero(fetch()); if (n >= 64) { return; } int64_t a = pop_a(); int64_t n_mask = (int64_t(1) << n) - 1; int sign_bit = (a >> (n - 1)) & 1; return push((sign_bit * ~n_mask) | (a & n_mask)); } case OP_zero_ext: { int64_t n = fetch(); if (n >= 64) { return; } int64_t a = pop_a(); int64_t n_mask = (int64_t(1) << n) - 1; return push(a & n_mask); } case OP_ref8: return load(t); case OP_ref16: return load(t); case OP_ref32: return load(t); case OP_ref64: return load(t); case OP_dup: return pick(0); case OP_swap: operands = pop_a_b(); push(operands.b); return push(operands.a); case OP_pop: pop_a(); return; case OP_pick: return pick(fetch()); case OP_rot: { int64_t c = pop_a(); int64_t b = pop_a(); int64_t a = pop_a(); push(c); push(b); return push(a); } case OP_if_goto: { uint16_t offset = fetch(); if (pop_a()) { pc = offset; } return; } case OP_goto: pc = fetch(); return; case OP_const8: return push(fetch()); case OP_const16: return push(fetch()); case OP_const32: return push(fetch()); case OP_const64: return push(fetch()); case OP_reg: { GdbRegisterValue v = GdbServer::get_reg(t->regs(), t->extra_regs(), GdbRegister(fetch())); if (!v.defined) { set_error(); return; } switch (v.size) { case 1: return push(v.value1); case 2: return push(v.value2); case 4: return push(v.value4); case 8: return push(v.value8); } set_error(); return; } case OP_end: end = true; return; default: set_error(); return; } } const vector& bytecode; vector stack; size_t pc; bool error; bool end; }; #ifdef WORKAROUND_GDB_BUGS /* https://sourceware.org/bugzilla/show_bug.cgi?id=18617 means that * gdb generates incorrect operands for OP_ext and OP_zero_ext. * We work around this bug by generating all the alternative programs that gdb * maybe should have generated, and evaluating all of them. If they agree on * the result, we return that as the correct result, otherwise we return * failure. */ static int count_variants(int bits) { int result = 1; if (bits > 8) { ++result; } if (bits > 16) { ++result; } if (bits > 32) { ++result; } return result; } template static T fetch(const uint8_t* data, size_t size, size_t pc) { if (pc + sizeof(T) > size) { return T(-1); } T v = 0; for (size_t i = 0; i < sizeof(T); ++i) { v = (v << 8) | data[pc + i]; } return v; } GdbExpression::GdbExpression(const uint8_t* data, size_t size) { vector instruction_starts; instruction_starts.resize(size); fill(instruction_starts.begin(), instruction_starts.end(), false); int64_t num_variants = 1; vector unvisited; unvisited.push_back(0); while (!unvisited.empty()) { size_t pc = unvisited.back(); unvisited.pop_back(); if (pc >= instruction_starts.size() || instruction_starts[pc]) { continue; } instruction_starts[pc] = true; switch (data[pc]) { case OP_ext: case OP_zero_ext: if (pc + 1 < size) { num_variants *= count_variants(data[pc + 1]); if (num_variants > 64) { // Too many variants, giving up on this expression return; } } unvisited.push_back(pc + 2); break; case OP_pick: case OP_const8: unvisited.push_back(pc + 2); break; case OP_if_goto: unvisited.push_back(fetch(data, size, pc + 1)); unvisited.push_back(pc + 3); break; case OP_goto: unvisited.push_back(fetch(data, size, pc + 1)); break; case OP_const16: case OP_reg: unvisited.push_back(pc + 3); break; case OP_const32: unvisited.push_back(pc + 5); break; case OP_const64: unvisited.push_back(pc + 9); break; case OP_end: break; default: unvisited.push_back(pc + 1); break; } } bytecode_variants.push_back(vector(data, data + size)); for (size_t i = 0; i < size; ++i) { if (!instruction_starts[i]) { continue; } if ((data[i] == OP_ext || data[i] == OP_zero_ext) && i + 1 < size) { uint8_t bits = data[i + 1]; vector > variants; for (auto& b : bytecode_variants) { // gdb perhaps should have used a smaller type width here --- 8, 16 or // 32 bits. if (bits > 8) { vector v = b; v[i + 1] = 8; variants.push_back(move(v)); } if (bits > 16) { vector v = b; v[i + 1] = 16; variants.push_back(move(v)); } if (bits > 32) { vector v = b; v[i + 1] = 32; variants.push_back(move(v)); } variants.push_back(move(b)); } bytecode_variants = move(variants); } } } #else GdbExpression::GdbExpression(const uint8_t* data, size_t size) { bytecode_variants.push_back(vector(data, data + size)); } #endif bool GdbExpression::evaluate(Task* t, Value* result) const { if (bytecode_variants.empty()) { return false; } bool first = true; for (auto& b : bytecode_variants) { ExpressionState state(b); for (int steps = 0; !state.end; ++steps) { if (steps >= 10000 || state.error) { return false; } state.step(t); } Value v = state.pop(); if (state.error) { return false; } if (first) { *result = v; first = false; } else if (*result != v) { return false; } } return true; } rr-4.1.0/src/GdbExpression.h000066400000000000000000000021451265436462100156660ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_GDB_EXPRESSION_H_ #define RR_GDB_EXPRESSION_H_ #include #include #include class Task; /** * gdb has a simple bytecode language for writing expressions to be evaluated * in a remote target. This class implements evaluation of such expressions. * See https://sourceware.org/gdb/current/onlinedocs/gdb/Agent-Expressions.html */ class GdbExpression { public: GdbExpression(const uint8_t* data, size_t size); struct Value { Value(int64_t i = 0) : i(i) {} bool operator==(const Value& v) { return i == v.i; } bool operator!=(const Value& v) { return !(*this == v); } int64_t i; }; /** * If evaluation succeeds, store the final result in *result and return true. * Otherwise return false. */ bool evaluate(Task* t, Value* result) const; private: /** * To work around gdb bugs, we may generate and evaluate multiple versions of * the same expression program. */ std::vector > bytecode_variants; }; #endif // RR_GDB_EXPRESSION_H_ rr-4.1.0/src/GdbInitCommand.cc000066400000000000000000000011451265436462100160660ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "Command.h" #include "GdbServer.h" #include "main.h" using namespace std; class GdbInitCommand : public Command { public: virtual int run(vector& args); protected: GdbInitCommand(const char* name, const char* help) : Command(name, help) {} static GdbInitCommand singleton; }; GdbInitCommand GdbInitCommand::singleton("gdbinit", " rr gdbinit\n"); int GdbInitCommand::run(vector& args) { while (parse_global_option(args)) { } fputs(GdbServer::init_script().c_str(), stdout); return 0; } rr-4.1.0/src/GdbRegister.h000066400000000000000000000055631265436462100153220ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_GDB_REGISTER_H_ #define RR_GDB_REGISTER_H_ /** * This is the register numbering used by GDB. */ enum GdbRegister { DREG_EAX, DREG_ECX, DREG_EDX, DREG_EBX, DREG_ESP, DREG_EBP, DREG_ESI, DREG_EDI, DREG_EIP, DREG_EFLAGS, DREG_CS, DREG_SS, DREG_DS, DREG_ES, DREG_FS, DREG_GS, DREG_FIRST_FXSAVE_REG, DREG_ST0 = DREG_FIRST_FXSAVE_REG, DREG_ST1, DREG_ST2, DREG_ST3, DREG_ST4, DREG_ST5, DREG_ST6, DREG_ST7, // These are the names GDB gives the registers. DREG_FCTRL, DREG_FSTAT, DREG_FTAG, DREG_FISEG, DREG_FIOFF, DREG_FOSEG, DREG_FOOFF, DREG_FOP, DREG_XMM0, DREG_XMM1, DREG_XMM2, DREG_XMM3, DREG_XMM4, DREG_XMM5, DREG_XMM6, DREG_XMM7, DREG_MXCSR, // XXX the last fxsave reg on *x86* DREG_LAST_FXSAVE_REG = DREG_MXCSR, DREG_ORIG_EAX, DREG_NUM_LINUX_I386, DREG_YMM0H, DREG_YMM1H, DREG_YMM2H, DREG_YMM3H, DREG_YMM4H, DREG_YMM5H, DREG_YMM6H, DREG_YMM7H, // Last register we can find in user_regs_struct // (except for orig_eax). DREG_NUM_USER_REGS = DREG_GS + 1, // x86-64 register numbers DREG_RAX = 0, DREG_RBX, DREG_RCX, DREG_RDX, DREG_RSI, DREG_RDI, DREG_RBP, DREG_RSP, DREG_R8, DREG_R9, DREG_R10, DREG_R11, DREG_R12, DREG_R13, DREG_R14, DREG_R15, DREG_RIP, // Things get a little tricky here, because x86-64 has some registers // named identically to its x86 counterpart, but we've used the names // in the x86 register definitions above, and the numbers they need // to represent are different. Hence the unique names here. DREG_64_EFLAGS, DREG_64_CS, DREG_64_SS, DREG_64_DS, DREG_64_ES, DREG_64_FS, DREG_64_GS, DREG_64_FIRST_FXSAVE_REG, DREG_64_ST0 = DREG_64_FIRST_FXSAVE_REG, DREG_64_ST1, DREG_64_ST2, DREG_64_ST3, DREG_64_ST4, DREG_64_ST5, DREG_64_ST6, DREG_64_ST7, // These are the names GDB gives the registers. DREG_64_FCTRL, DREG_64_FSTAT, DREG_64_FTAG, DREG_64_FISEG, DREG_64_FIOFF, DREG_64_FOSEG, DREG_64_FOOFF, DREG_64_FOP, DREG_64_XMM0, DREG_64_XMM1, DREG_64_XMM2, DREG_64_XMM3, DREG_64_XMM4, DREG_64_XMM5, DREG_64_XMM6, DREG_64_XMM7, DREG_64_XMM8, DREG_64_XMM9, DREG_64_XMM10, DREG_64_XMM11, DREG_64_XMM12, DREG_64_XMM13, DREG_64_XMM14, DREG_64_XMM15, DREG_64_MXCSR, DREG_64_LAST_FXSAVE_REG = DREG_64_MXCSR, DREG_ORIG_RAX, DREG_NUM_LINUX_X86_64, DREG_64_YMM0H, DREG_64_YMM1H, DREG_64_YMM2H, DREG_64_YMM3H, DREG_64_YMM4H, DREG_64_YMM5H, DREG_64_YMM6H, DREG_64_YMM7H, DREG_64_YMM8H, DREG_64_YMM9H, DREG_64_YMM10H, DREG_64_YMM11H, DREG_64_YMM12H, DREG_64_YMM13H, DREG_64_YMM14H, DREG_64_YMM15H, // Last register we can find in user_regs_struct (except for orig_rax). DREG_64_NUM_USER_REGS = DREG_64_GS + 1, }; #endif /* RR_GDB_REGISTER_H_ */ rr-4.1.0/src/GdbServer.cc000066400000000000000000001256311265436462100151410ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "GdbServer" #include "GdbServer.h" #include #include #include #include #include #include #include #include #include #include #include "BreakpointCondition.h" #include "GdbCommandHandler.h" #include "GdbExpression.h" #include "kernel_metadata.h" #include "log.h" #include "ReplaySession.h" #include "ScopedFd.h" #include "task.h" #include "util.h" using namespace rr; using namespace std; /** * 32-bit writes to DBG_COMMAND_MAGIC_ADDRESS by the debugger trigger * rr commands. */ static const uint32_t DBG_COMMAND_MAGIC_ADDRESS = 29298; // 'rr' /** * The high-order byte of the 32-bit value indicates the specific command * message. Not-understood command messages are ignored. */ static const uint32_t DBG_COMMAND_MSG_MASK = 0xFF000000; /** * Create a checkpoint of the current state whose index is given by the * command parameter. If there is already a checkpoint with that index, it * is deleted first. */ static const int DBG_COMMAND_MSG_CREATE_CHECKPOINT = 0x01000000; /** * Delete the checkpoint of the current state whose index is given by the * command parameter. */ static const int DBG_COMMAND_MSG_DELETE_CHECKPOINT = 0x02000000; static const uint32_t DBG_COMMAND_PARAMETER_MASK = 0x00FFFFFF; // Special-sauce macros defined by rr when launching the gdb client, // which implement functionality outside of the gdb remote protocol. // (Don't stare at them too long or you'll go blind ;).) static const string& gdb_rr_macros() { static string s; if (s.empty()) { stringstream ss; ss // TODO define `document' sections for these << "define checkpoint\n" << " init-if-undefined $_next_checkpoint_index = 1\n" /* Ensure the command echoes the checkpoint number, not the encoded * message */ << " p (*(int*)" << DBG_COMMAND_MAGIC_ADDRESS << " = " << DBG_COMMAND_MSG_CREATE_CHECKPOINT << " | $_next_checkpoint_index), " << "$_next_checkpoint_index++\n" << "end\n" << "define delete checkpoint\n" << " p (*(int*)" << DBG_COMMAND_MAGIC_ADDRESS << " = " << DBG_COMMAND_MSG_DELETE_CHECKPOINT << " | $arg0), $arg0\n" << "end\n" << "define restart\n" << " run c$arg0\n" << "end\n" // In gdb version "Fedora 7.8.1-30.fc21", a raw "run" command // issued before any user-generated resume-execution command // results in gdb hanging just after the inferior hits an internal // gdb breakpoint. This happens outside of rr, with gdb // controlling gdbserver, as well. We work around that by // ensuring *some* resume-execution command has been issued before // restarting the session. But, only if the inferior hasn't // already finished execution ($_thread != 0). If it has and we // issue the "stepi" command, then gdb refuses to restart // execution. << "define hook-run\n" << " if $_thread != 0 && !$suppress_run_hook\n" << " stepi\n" << " end\n" << "end\n" << "define hookpost-continue\n" << " set $suppress_run_hook = 1\n" << "end\n" << "define hookpost-step\n" << " set $suppress_run_hook = 1\n" << "end\n" << "define hookpost-stepi\n" << " set $suppress_run_hook = 1\n" << "end\n" << "define hookpost-next\n" << " set $suppress_run_hook = 1\n" << "end\n" << "define hookpost-nexti\n" << " set $suppress_run_hook = 1\n" << "end\n" << "define hookpost-finish\n" << " set $suppress_run_hook = 1\n" << "end\n" << "define hookpost-reverse-continue\n" << " set $suppress_run_hook = 1\n" << "end\n" << "define hookpost-reverse-step\n" << " set $suppress_run_hook = 1\n" << "end\n" << "define hookpost-reverse-stepi\n" << " set $suppress_run_hook = 1\n" << "end\n" << "define hookpost-reverse-finish\n" << " set $suppress_run_hook = 1\n" << "end\n" << "define hookpost-run\n" << " set $suppress_run_hook = 0\n" << "end\n" << "handle SIGURG stop\n" << "set prompt (rr) \n" << GdbCommandHandler::gdb_macros() // Try both "set target-async" and "maint set target-async" since // that changed recently. << "set target-async 0\n" << "maint set target-async 0\n"; s = ss.str(); } return s; } /** * Attempt to find the value of |regname| (a DebuggerRegister * name), and if so (i) write it to |buf|; (ii) * set |*defined = true|; (iii) return the size of written * data. If |*defined == false|, the value of |buf| is * meaningless. * * This helper can fetch the values of both general-purpose * and "extra" registers. * * NB: |buf| must be large enough to hold the largest register * value that can be named by |regname|. */ static size_t get_reg(const Registers& regs, const ExtraRegisters& extra_regs, uint8_t* buf, GdbRegister regname, bool* defined) { size_t num_bytes = regs.read_register(buf, regname, defined); if (!*defined) { num_bytes = extra_regs.read_register(buf, regname, defined); } return num_bytes; } /** * Return the register |which|, which may not have a defined value. */ GdbRegisterValue GdbServer::get_reg(const Registers& regs, const ExtraRegisters& extra_regs, GdbRegister which) { GdbRegisterValue reg; memset(®, 0, sizeof(reg)); reg.name = which; reg.size = ::get_reg(regs, extra_regs, ®.value[0], which, ®.defined); return reg; } static GdbThreadId get_threadid(const Session& session, const TaskUid& tuid) { Task* t = session.find_task(tuid); pid_t pid = t ? t->tgid() : GdbThreadId::ANY.pid; return GdbThreadId(pid, tuid.tid()); } static GdbThreadId get_threadid(Task* t) { return GdbThreadId(t->tgid(), t->rec_tid); } static bool matches_threadid(Task* t, const GdbThreadId& target) { return (target.pid <= 0 || target.pid == t->tgid()) && (target.tid <= 0 || target.tid == t->rec_tid); } static WatchType watchpoint_type(GdbRequestType req) { switch (req) { case DREQ_SET_HW_BREAK: case DREQ_REMOVE_HW_BREAK: return WATCH_EXEC; case DREQ_SET_WR_WATCH: case DREQ_REMOVE_WR_WATCH: return WATCH_WRITE; case DREQ_REMOVE_RDWR_WATCH: case DREQ_SET_RDWR_WATCH: // NB: x86 doesn't support read-only watchpoints (who would // ever want to use one?) so we treat them as readwrite // watchpoints and hope that gdb can figure out what's going // on. That is, if a user ever tries to set a read // watchpoint. case DREQ_REMOVE_RD_WATCH: case DREQ_SET_RD_WATCH: return WATCH_READWRITE; default: FATAL() << "Unknown dbg request " << req; return WatchType(-1); // not reached } } static void maybe_singlestep_for_event(Task* t, GdbRequest* req) { if (trace_instructions_up_to_event( t->replay_session().current_trace_frame().time())) { fputs("Stepping: ", stderr); t->regs().print_register_file_compact(stderr); fprintf(stderr, " ticks:%" PRId64 "\n", t->tick_count()); *req = GdbRequest(DREQ_CONT); req->suppress_debugger_stop = true; req->cont().actions.push_back(GdbContAction( ACTION_STEP, get_threadid(t->replay_session(), t->tuid()))); } } bool GdbServer::maybe_process_magic_command(const GdbRequest& req) { if (!(req.mem().addr == DBG_COMMAND_MAGIC_ADDRESS && req.mem().len == 4)) { return false; } uint32_t cmd; memcpy(&cmd, req.mem().data.data(), sizeof(cmd)); uintptr_t param = cmd & DBG_COMMAND_PARAMETER_MASK; switch (cmd & DBG_COMMAND_MSG_MASK) { case DBG_COMMAND_MSG_CREATE_CHECKPOINT: { if (timeline.can_add_checkpoint()) { checkpoints[param] = Checkpoint(timeline, last_continue_tuid, Checkpoint::EXPLICIT); } break; } case DBG_COMMAND_MSG_DELETE_CHECKPOINT: { auto it = checkpoints.find(param); if (it != checkpoints.end()) { timeline.remove_explicit_checkpoint(it->second.mark); checkpoints.erase(it); } break; } default: return false; } dbg->reply_set_mem(true); return true; } void GdbServer::dispatch_regs_request(const Registers& regs, const ExtraRegisters& extra_regs) { size_t n_regs = regs.total_registers(); GdbRegisterFile file(n_regs); for (size_t i = 0; i < n_regs; ++i) { file.regs[i] = get_reg(regs, extra_regs, GdbRegister(i)); } dbg->reply_get_regs(file); } class GdbBreakpointCondition : public BreakpointCondition { public: GdbBreakpointCondition(const vector >& bytecodes) { for (auto& b : bytecodes) { expressions.push_back(GdbExpression(b.data(), b.size())); } } virtual bool evaluate(Task* t) const { for (auto& e : expressions) { GdbExpression::Value v; // Break if evaluation fails or the result is nonzero if (!e.evaluate(t, &v) || v.i != 0) { return true; } } return false; } private: vector expressions; }; static unique_ptr breakpoint_condition( const GdbRequest& request) { if (request.watch().conditions.empty()) { return nullptr; } return unique_ptr( new GdbBreakpointCondition(request.watch().conditions)); } static bool search_memory(Task* t, const MemoryRange& where, const vector& find, remote_ptr* result) { vector buf; buf.resize(page_size() + find.size() - 1); for (const auto& m : t->vm()->maps()) { MemoryRange r = MemoryRange(m.map.start(), m.map.end() + find.size() - 1) .intersect(where); // We basically read page by page here, but we read past the end of the // page to handle the case where a found string crosses page boundaries. // This approach isn't great for handling long search strings but gdb's find // command isn't really suited to that. // Reading page by page lets us avoid problems where some pages in a // mapping aren't readable (e.g. reading beyond end of file). while (r.size() >= find.size()) { ssize_t nread = t->read_bytes_fallible( r.start(), std::min(buf.size(), r.size()), buf.data()); if (nread >= ssize_t(find.size())) { void* found = memmem(buf.data(), nread, find.data(), find.size()); if (found) { *result = r.start() + (static_cast(found) - buf.data()); return true; } } r = MemoryRange( std::min(r.end(), floor_page_size(r.start()) + page_size()), r.end()); } } return false; } void GdbServer::dispatch_debugger_request(Session& session, const GdbRequest& req, ReportState state) { assert(!req.is_resume_request()); // These requests don't require a target task. switch (req.type) { case DREQ_RESTART: assert(false); return; // unreached case DREQ_GET_CURRENT_THREAD: dbg->reply_get_current_thread(get_threadid(session, last_continue_tuid)); return; case DREQ_GET_OFFSETS: /* TODO */ dbg->reply_get_offsets(); return; case DREQ_GET_THREAD_LIST: { vector tids; if (state != REPORT_THREADS_DEAD) { for (auto& kv : session.tasks()) { tids.push_back(get_threadid(session, kv.second->tuid())); } } dbg->reply_get_thread_list(tids); return; } case DREQ_INTERRUPT: { Task* t = session.find_task(last_continue_tuid); ASSERT(t, session.is_diversion()) << "Replay interrupts should be handled at a higher level"; assert(!t || t->task_group()->tguid() == debuggee_tguid); dbg->notify_stop(t ? get_threadid(t) : GdbThreadId(), 0); stop_reason = 0; if (t) { last_query_tuid = last_continue_tuid = t->tuid(); } return; } default: /* fall through to next switch stmt */ break; } bool is_query = req.type != DREQ_SET_CONTINUE_THREAD; Task* target = req.target.tid > 0 ? session.find_task(req.target.tid) : session.find_task(is_query ? last_query_tuid : last_continue_tuid); if (target) { if (is_query) { last_query_tuid = target->tuid(); } else { last_continue_tuid = target->tuid(); } } // These requests query or manipulate which task is the // target, so it's OK if the task doesn't exist. switch (req.type) { case DREQ_GET_IS_THREAD_ALIVE: dbg->reply_get_is_thread_alive(target != nullptr); return; case DREQ_GET_THREAD_EXTRA_INFO: dbg->reply_get_thread_extra_info(target->name()); return; case DREQ_SET_CONTINUE_THREAD: dbg->reply_select_thread(target != nullptr); return; case DREQ_SET_QUERY_THREAD: dbg->reply_select_thread(target != nullptr); return; default: // fall through to next switch stmt break; } // These requests require a valid target task. We don't trust // the debugger to use the information provided above to only // query valid tasks. if (!target) { dbg->notify_no_such_thread(req); return; } switch (req.type) { case DREQ_GET_AUXV: { dbg->reply_get_auxv(target->vm()->saved_auxv()); return; } case DREQ_GET_MEM: { vector mem; mem.resize(req.mem().len); ssize_t nread = target->read_bytes_fallible(req.mem().addr, req.mem().len, mem.data()); mem.resize(max(ssize_t(0), nread)); target->vm()->replace_breakpoints_with_original_values( mem.data(), mem.size(), req.mem().addr); dbg->reply_get_mem(mem); return; } case DREQ_SET_MEM: { // gdb has been observed to send requests of length 0 at // odd times // (e.g. before sending the magic write to create a checkpoint) if (req.mem().len == 0) { dbg->reply_set_mem(true); return; } if (maybe_process_magic_command(req)) { return; } // We only allow the debugger to write memory if the // memory will be written to an diversion session. // Arbitrary writes to replay sessions cause // divergence. if (!session.is_diversion()) { LOG(error) << "Attempt to write memory outside diversion session"; dbg->reply_set_mem(false); return; } LOG(debug) << "Writing " << req.mem().len << " bytes to " << HEX(req.mem().addr); // TODO fallible target->write_bytes_helper(req.mem().addr, req.mem().len, req.mem().data.data()); dbg->reply_set_mem(true); return; } case DREQ_SEARCH_MEM: { remote_ptr addr; bool found = search_memory(target, MemoryRange(req.mem().addr, req.mem().len), req.mem().data, &addr); dbg->reply_search_mem(found, addr); return; } case DREQ_GET_REG: { GdbRegisterValue reg = get_reg(target->regs(), target->extra_regs(), req.reg().name); dbg->reply_get_reg(reg); return; } case DREQ_GET_REGS: { dispatch_regs_request(target->regs(), target->extra_regs()); return; } case DREQ_SET_REG: { if (!session.is_diversion()) { // gdb sets orig_eax to -1 during a restart. For a // replay session this is not correct (we might be // restarting from an rr checkpoint inside a system // call, and we must not tamper with replay state), so // just ignore it. if ((target->arch() == x86 && req.reg().name == DREG_ORIG_EAX) || (target->arch() == x86_64 && req.reg().name == DREG_ORIG_RAX)) { dbg->reply_set_reg(true); return; } LOG(error) << "Attempt to write register outside diversion session"; dbg->reply_set_reg(false); return; } if (req.reg().defined) { Registers regs = target->regs(); regs.write_register(req.reg().name, req.reg().value, req.reg().size); target->set_regs(regs); } dbg->reply_set_reg(true /*currently infallible*/); return; } case DREQ_GET_STOP_REASON: { dbg->reply_get_stop_reason(get_threadid(session, last_continue_tuid), stop_reason); return; } case DREQ_SET_SW_BREAK: { ASSERT(target, req.watch().kind == sizeof(AddressSpace::breakpoint_insn)) << "Debugger setting bad breakpoint insn"; // Mirror all breakpoint/watchpoint sets/unsets to the target process // if it's not part of the timeline (i.e. it's a diversion). Task* replay_task = timeline.current_session().find_task(target->tuid()); bool ok = timeline.add_breakpoint(replay_task, req.watch().addr, breakpoint_condition(req)); if (ok && &session != &timeline.current_session()) { bool diversion_ok = target->vm()->add_breakpoint(req.watch().addr, TRAP_BKPT_USER); ASSERT(target, diversion_ok); } dbg->reply_watchpoint_request(ok); return; } case DREQ_SET_HW_BREAK: case DREQ_SET_RD_WATCH: case DREQ_SET_WR_WATCH: case DREQ_SET_RDWR_WATCH: { Task* replay_task = timeline.current_session().find_task(target->tuid()); bool ok = timeline.add_watchpoint( replay_task, req.watch().addr, req.watch().kind, watchpoint_type(req.type), breakpoint_condition(req)); if (ok && &session != &timeline.current_session()) { bool diversion_ok = target->vm()->add_watchpoint( req.watch().addr, req.watch().kind, watchpoint_type(req.type)); ASSERT(target, diversion_ok); } dbg->reply_watchpoint_request(ok); return; } case DREQ_REMOVE_SW_BREAK: { Task* replay_task = timeline.current_session().find_task(target->tuid()); timeline.remove_breakpoint(replay_task, req.watch().addr); if (&session != &timeline.current_session()) { target->vm()->remove_breakpoint(req.watch().addr, TRAP_BKPT_USER); } dbg->reply_watchpoint_request(true); return; } case DREQ_REMOVE_HW_BREAK: case DREQ_REMOVE_RD_WATCH: case DREQ_REMOVE_WR_WATCH: case DREQ_REMOVE_RDWR_WATCH: { Task* replay_task = timeline.current_session().find_task(target->tuid()); timeline.remove_watchpoint(replay_task, req.watch().addr, req.watch().kind, watchpoint_type(req.type)); if (&session != &timeline.current_session()) { target->vm()->remove_watchpoint(req.watch().addr, req.watch().kind, watchpoint_type(req.type)); } dbg->reply_watchpoint_request(true); return; } case DREQ_READ_SIGINFO: LOG(warn) << "READ_SIGINFO request outside of diversion session"; dbg->reply_read_siginfo(vector()); return; case DREQ_WRITE_SIGINFO: LOG(warn) << "WRITE_SIGINFO request outside of diversion session"; dbg->reply_write_siginfo(); return; case DREQ_RR_CMD: dbg->reply_rr_cmd( GdbCommandHandler::process_command(*this, target, req.text())); return; default: FATAL() << "Unknown debugger request " << req.type; } } bool GdbServer::diverter_process_debugger_requests( DiversionSession& diversion_session, uint32_t& diversion_refcount, GdbRequest* req) { while (true) { *req = dbg->get_request(); if (req->is_resume_request()) { return diversion_refcount > 0; } switch (req->type) { case DREQ_RESTART: case DREQ_DETACH: diversion_refcount = 0; return false; case DREQ_READ_SIGINFO: { LOG(debug) << "Adding ref to diversion session"; ++diversion_refcount; // TODO: maybe share with replayer.cc? vector si_bytes; si_bytes.resize(req->mem().len); memset(si_bytes.data(), 0, si_bytes.size()); dbg->reply_read_siginfo(si_bytes); continue; } case DREQ_SET_QUERY_THREAD: { if (req->target.tid) { Task* next = diversion_session.find_task(req->target.tid); if (next) { last_query_tuid = next->tuid(); } } break; } case DREQ_WRITE_SIGINFO: LOG(debug) << "Removing reference to diversion session ..."; assert(diversion_refcount > 0); --diversion_refcount; if (diversion_refcount == 0) { LOG(debug) << " ... dying at next continue request"; } dbg->reply_write_siginfo(); continue; default: break; } dispatch_debugger_request(diversion_session, *req, REPORT_NORMAL); } } static bool is_last_thread_exit(const BreakStatus& break_status) { return break_status.task_exit && break_status.task->task_group()->task_set().size() == 1; } static Task* is_in_exec(ReplayTimeline& timeline) { Task* t = timeline.current_session().current_task(); return t && timeline.current_session().next_step_is_syscall_exit( syscall_number_for_execve(t->arch())) ? t : nullptr; } void GdbServer::maybe_notify_stop(const GdbRequest& req, const BreakStatus& break_status) { int sig = -1; remote_ptr watch_addr; if (!break_status.watchpoints_hit.empty()) { sig = SIGTRAP; watch_addr = break_status.watchpoints_hit[0].addr; } if (break_status.breakpoint_hit || break_status.singlestep_complete) { sig = SIGTRAP; } if (break_status.signal) { sig = break_status.signal; } if (is_last_thread_exit(break_status) && dbg->features().reverse_execution) { if (req.cont().run_direction == RUN_FORWARD) { // The exit of the last task in a task group generates a fake SIGKILL, // when reverse-execution is enabled, because users often want to run // backwards from the end of the task. sig = SIGKILL; } else { // The start of the debuggee task-group should trigger a silent stop. sig = 0; } } Task* t = break_status.task; Task* in_exec_task = is_in_exec(timeline); if (in_exec_task) { sig = 0; t = in_exec_task; } if (sig >= 0 && t->task_group()->tguid() == debuggee_tguid) { /* Notify the debugger and process any new requests * that might have triggered before resuming. */ dbg->notify_stop(get_threadid(t), sig, watch_addr.as_int()); stop_reason = sig; last_query_tuid = last_continue_tuid = t->tuid(); } } static RunCommand compute_run_command_from_actions(Task* t, const GdbRequest& req, int* signal_to_deliver) { for (auto& action : req.cont().actions) { if (matches_threadid(t, action.target)) { // We can only run task |t|; neither diversion nor replay sessions // support running multiple threads. So even if gdb tells us to continue // multiple threads, we don't do that. *signal_to_deliver = action.signal_to_deliver; return action.type == ACTION_STEP ? RUN_SINGLESTEP : RUN_CONTINUE; } } // gdb told us to run (or step) some thread that's not |t|, without resuming // |t|. It sometimes does this even though its target thread is entering a // blocking syscall and |t| must run before gdb's target thread can make // progress. So, allow |t| to run anyway. *signal_to_deliver = 0; return RUN_CONTINUE; } struct AllowedTasks { TaskUid task; // tid 0 means 'any member of debuggee_tguid' RunCommand command; }; static RunCommand compute_run_command_for_reverse_exec( Session& session, const TaskGroupUid& debuggee_tguid, const GdbRequest& req, vector& allowed_tasks) { // Singlestep if any of the actions request singlestepping. RunCommand result = RUN_CONTINUE; for (auto& action : req.cont().actions) { if (action.target.pid > 0 && action.target.pid != debuggee_tguid.tid()) { continue; } AllowedTasks allowed; allowed.command = RUN_CONTINUE; if (action.type == ACTION_STEP) { allowed.command = result = RUN_SINGLESTEP; } if (action.target.tid > 0) { Task* t = session.find_task(action.target.tid); if (t) { allowed.task = t->tuid(); } } allowed_tasks.push_back(allowed); } return result; } /** * Create a new diversion session using |replay| session as the * template. The |replay| session isn't mutated. * * Execution begins in the new diversion session under the control of * |dbg| starting with initial thread target |task|. The diversion * session ends at the request of |dbg|, and |req| returns the first * request made that wasn't handled by the diversion session. That * is, the first request that should be handled by |replay| upon * resuming execution in that session. */ GdbRequest GdbServer::divert(ReplaySession& replay) { GdbRequest req; LOG(debug) << "Starting debugging diversion for " << &replay; if (timeline.is_running()) { // Ensure breakpoints and watchpoints are applied before we fork the // diversion, to ensure the diversion is consistent with the timeline // breakpoint/watchpoint state. timeline.apply_breakpoints_and_watchpoints(); } DiversionSession::shr_ptr diversion_session = replay.clone_diversion(); uint32_t diversion_refcount = 1; TaskUid saved_query_tuid = last_query_tuid; while (diverter_process_debugger_requests(*diversion_session, diversion_refcount, &req)) { assert(req.is_resume_request()); if (req.cont().run_direction == RUN_BACKWARD) { // We don't support reverse execution in a diversion. Just issue // an immediate stop. dbg->notify_stop(get_threadid(*diversion_session, last_continue_tuid), 0); stop_reason = 0; last_query_tuid = last_continue_tuid; continue; } Task* t = diversion_session->find_task(last_continue_tuid); if (!t) { diversion_refcount = 0; req = GdbRequest(DREQ_NONE); break; } int signal_to_deliver; RunCommand command = compute_run_command_from_actions(t, req, &signal_to_deliver); auto result = diversion_session->diversion_step(t, command, signal_to_deliver); if (result.status == DiversionSession::DIVERSION_EXITED) { diversion_refcount = 0; req = GdbRequest(DREQ_NONE); break; } assert(result.status == DiversionSession::DIVERSION_CONTINUE); maybe_notify_stop(req, result.break_status); } LOG(debug) << "... ending debugging diversion"; assert(diversion_refcount == 0); diversion_session->kill_all_tasks(); last_query_tuid = saved_query_tuid; return req; } /** * Reply to debugger requests until the debugger asks us to resume * execution, detach, restart, or interrupt. */ GdbRequest GdbServer::process_debugger_requests(ReportState state) { while (true) { GdbRequest req = dbg->get_request(); req.suppress_debugger_stop = false; try_lazy_reverse_singlesteps(req); if (req.type == DREQ_READ_SIGINFO) { // TODO: we send back a dummy siginfo_t to gdb // so that it thinks the request succeeded. // If we don't, then it thinks the // READ_SIGINFO failed and won't attempt to // send WRITE_SIGINFO. For |call foo()| // frames, that means we don't know when the // diversion session is ending. vector si_bytes; si_bytes.resize(req.mem().len); memset(si_bytes.data(), 0, si_bytes.size()); dbg->reply_read_siginfo(si_bytes); req = divert(timeline.current_session()); if (req.type == DREQ_NONE) { continue; } // Carry on to process the request that was rejected by // the diversion session } if (req.is_resume_request()) { Task* t = current_session().find_task(last_continue_tuid); if (t) { maybe_singlestep_for_event(t, &req); } return req; } if (req.type == DREQ_INTERRUPT) { LOG(debug) << " request to interrupt"; return req; } if (req.type == DREQ_RESTART) { // Debugger client requested that we restart execution // from the beginning. Restart our debug session. LOG(debug) << " request to restart at event " << req.restart().param; return req; } if (req.type == DREQ_DETACH) { LOG(debug) << " debugger detached"; dbg->reply_detach(); return req; } dispatch_debugger_request(current_session(), req, state); } } void GdbServer::try_lazy_reverse_singlesteps(GdbRequest& req) { if (!timeline.is_running()) { return; } ReplayTimeline::Mark now; bool need_seek = false; Task* t = timeline.current_session().current_task(); while (t && req.type == DREQ_CONT && req.cont().run_direction == RUN_BACKWARD && req.cont().actions.size() == 1 && req.cont().actions[0].type == ACTION_STEP && req.cont().actions[0].signal_to_deliver == 0 && matches_threadid(t, req.cont().actions[0].target) && !req.suppress_debugger_stop) { if (!now) { now = timeline.mark(); } ReplayTimeline::Mark previous = timeline.lazy_reverse_singlestep(now, t); if (!previous) { break; } now = previous; need_seek = true; BreakStatus break_status; break_status.task = t; break_status.singlestep_complete = true; LOG(debug) << " using lazy reverse-singlestep"; maybe_notify_stop(req, break_status); while (true) { req = dbg->get_request(); req.suppress_debugger_stop = false; if (req.type != DREQ_GET_REGS) { break; } LOG(debug) << " using lazy reverse-singlestep registers"; dispatch_regs_request(now.regs(), now.extra_regs()); } } if (need_seek) { timeline.seek_to_mark(now); } } bool GdbServer::detach_or_restart(const GdbRequest& req, ContinueOrStop* s) { if (DREQ_RESTART == req.type) { restart_session(req); *s = CONTINUE_DEBUGGING; return true; } if (DREQ_DETACH == req.type) { *s = STOP_DEBUGGING; return true; } return false; } GdbServer::ContinueOrStop GdbServer::handle_exited_state( GdbRequest& last_resume_request) { // TODO return real exit code, if it's useful. dbg->notify_exit_code(0); final_event = timeline.current_session().trace_reader().time(); GdbRequest req = process_debugger_requests(REPORT_THREADS_DEAD); ContinueOrStop s; if (detach_or_restart(req, &s)) { last_resume_request = GdbRequest(); return s; } FATAL() << "Received continue/interrupt request after end-of-trace."; return STOP_DEBUGGING; } GdbServer::ContinueOrStop GdbServer::debug_one_step( GdbRequest& last_resume_request) { ReplayResult result; GdbRequest req; if (in_debuggee_end_state) { // Treat the state where the last thread is about to exit like // termination. req = process_debugger_requests(); // If it's a forward execution request, fake the exited state. if (req.is_resume_request() && req.cont().run_direction == RUN_FORWARD) { if (interrupt_pending) { // Just process this. We're getting it after a restart. } else { return handle_exited_state(last_resume_request); } } else { in_debuggee_end_state = false; } // Otherwise (e.g. detach, restart, interrupt or reverse-exec) process // the request as normal. } else if (!interrupt_pending || last_resume_request.type == DREQ_NONE) { req = process_debugger_requests(); } else { req = last_resume_request; } ContinueOrStop s; if (detach_or_restart(req, &s)) { last_resume_request = GdbRequest(); return s; } if (req.is_resume_request()) { last_resume_request = req; } else { assert(req.type == DREQ_INTERRUPT); interrupt_pending = true; req = last_resume_request; assert(req.is_resume_request()); } if (interrupt_pending) { Task* t = timeline.current_session().current_task(); if (t->task_group()->tguid() == debuggee_tguid) { interrupt_pending = false; dbg->notify_stop(get_threadid(t), in_debuggee_end_state ? SIGKILL : 0); stop_reason = 0; return CONTINUE_DEBUGGING; } } auto interrupt_check = [&]() { return dbg->sniff_packet(); }; if (req.cont().run_direction == RUN_FORWARD) { if (is_in_exec(timeline) && timeline.current_session().current_task()->task_group()->tguid() == debuggee_tguid) { // Don't go any further forward. maybe_notify_stop will generate a // stop. result = ReplayResult(); } else { int signal_to_deliver; RunCommand command = compute_run_command_from_actions( timeline.current_session().current_task(), req, &signal_to_deliver); // Ignore gdb's |signal_to_deliver|; we just have to follow the replay. result = timeline.replay_step_forward(command, target.event, interrupt_check); } if (result.status == REPLAY_EXITED) { return handle_exited_state(last_resume_request); } } else { vector allowed_tasks; // Convert the tids in GdbContActions into TaskUids to avoid issues // if tids get reused. RunCommand command = compute_run_command_for_reverse_exec( timeline.current_session(), debuggee_tguid, req, allowed_tasks); auto stop_filter = [&](Task* t) -> bool { if (t->task_group()->tguid() != debuggee_tguid) { return false; } // If gdb's requested actions don't allow the task to run, we still // let it run (we can't do anything else, since we're replaying), but // we won't report stops in that task. for (auto& a : allowed_tasks) { if (a.task.tid() == 0 || a.task == t->tuid()) { return true; } } return false; }; switch (command) { case RUN_CONTINUE: result = timeline.reverse_continue(stop_filter, interrupt_check); break; case RUN_SINGLESTEP: { Task* t = timeline.current_session().find_task(last_continue_tuid); assert(t); result = timeline.reverse_singlestep( last_continue_tuid, t->tick_count(), stop_filter, interrupt_check); break; } default: assert(0 && "Unknown RunCommand"); } if (result.status == REPLAY_EXITED) { return handle_exited_state(last_resume_request); } } if (!req.suppress_debugger_stop) { maybe_notify_stop(req, result.break_status); } if (req.cont().run_direction == RUN_FORWARD && is_last_thread_exit(result.break_status) && result.break_status.task->task_group()->tguid() == debuggee_tguid) { in_debuggee_end_state = true; } return CONTINUE_DEBUGGING; } bool GdbServer::at_target() { // Don't launch the debugger for the initial rr fork child. // No one ever wants that to happen. if (!timeline.current_session().can_validate()) { return false; } Task* t = timeline.current_session().current_task(); if (!t) { return false; } if (!timeline.can_add_checkpoint()) { return false; } if (stop_replaying_to_target) { return true; } // When we decide to create the debugger, we may end up // creating a checkpoint. In that case, we want the // checkpoint to retain the state it had *before* we started // replaying the next frame. Otherwise, the TraceIfstream // will be one frame ahead of its tracee tree. // // So we make the decision to create the debugger based on the // frame we're *about to* replay, without modifying the // TraceIfstream. // NB: we'll happily attach to whichever task within the // group happens to be scheduled here. We don't take // "attach to process" to mean "attach to thread-group // leader". return timeline.current_session().current_trace_frame().time() > target.event && (!target.pid || t->tgid() == target.pid) && (!target.require_exec || t->vm()->execed()) && // Ensure we're at the start of processing an event. We don't // want to attach while we're finishing an exec() since that's a // slightly confusing state for ReplayTimeline's reverse execution. !timeline.current_session().current_step_key().in_execution(); } /** * The trace has reached the event at which the user wanted to start debugging. * Set up the appropriate state. */ void GdbServer::activate_debugger() { TraceFrame next_frame = timeline.current_session().current_trace_frame(); TraceFrame::Time event_now = next_frame.time(); if (!stop_replaying_to_target && (target.event > 0 || target.pid)) { fprintf(stderr, "\a\n" "--------------------------------------------------\n" " ---> Reached target process %d at event %u.\n" "--------------------------------------------------\n", target.pid, event_now); } Task* t = timeline.current_session().current_task(); // Store the current tgid and event as the "execution target" // for the next replay session, if we end up restarting. This // allows us to determine if a later session has reached this // target without necessarily replaying up to this point. target.pid = t->tgid(); target.require_exec = false; target.event = event_now; last_query_tuid = last_continue_tuid = t->tuid(); // Have the "checkpoint" be the original replay // session, and then switch over to using the cloned // session. The cloned tasks will look like children // of the clonees, so this scheme prevents |pstree| // output from getting /too/ far out of whack. if (timeline.can_add_checkpoint()) { debugger_restart_checkpoint = Checkpoint(timeline, last_continue_tuid, Checkpoint::EXPLICIT); } else { debugger_restart_checkpoint = Checkpoint(timeline, last_continue_tuid, Checkpoint::NOT_EXPLICIT); } } void GdbServer::restart_session(const GdbRequest& req) { assert(req.type == DREQ_RESTART); assert(dbg); in_debuggee_end_state = false; timeline.remove_breakpoints_and_watchpoints(); Checkpoint checkpoint_to_restore; if (req.restart().type == RESTART_FROM_CHECKPOINT) { auto it = checkpoints.find(req.restart().param); if (it == checkpoints.end()) { cout << "Checkpoint " << req.restart().param_str << " not found.\n"; cout << "Valid checkpoints:"; for (auto& c : checkpoints) { cout << " " << c.first; } cout << "\n"; dbg->notify_restart_failed(); return; } checkpoint_to_restore = it->second; } else if (req.restart().type == RESTART_FROM_PREVIOUS) { checkpoint_to_restore = debugger_restart_checkpoint; } interrupt_pending = true; if (checkpoint_to_restore.mark) { timeline.seek_to_mark(checkpoint_to_restore.mark); last_query_tuid = last_continue_tuid = checkpoint_to_restore.last_continue_tuid; if (debugger_restart_checkpoint.is_explicit == Checkpoint::EXPLICIT) { timeline.remove_explicit_checkpoint(debugger_restart_checkpoint.mark); } debugger_restart_checkpoint = checkpoint_to_restore; if (timeline.can_add_checkpoint()) { timeline.add_explicit_checkpoint(); } return; } stop_replaying_to_target = false; assert(req.restart().type == RESTART_FROM_EVENT); // Note that we don't reset the target pid; we intentionally keep targeting // the same process no matter what is running when we hit the event. target.event = req.restart().param; if (final_event >= 0) { target.event = min(final_event - 1, target.event); } timeline.seek_to_before_event(target.event); do { ReplayResult result = timeline.replay_step_forward(RUN_CONTINUE, target.event); // We should never reach the end of the trace without hitting the stop // condition below. assert(result.status != REPLAY_EXITED); if (is_last_thread_exit(result.break_status) && result.break_status.task->task_group()->tgid == target.pid) { // Debuggee task is about to exit. Stop here. in_debuggee_end_state = true; break; } } while (!at_target()); activate_debugger(); } void GdbServer::serve_replay(const ConnectionFlags& flags) { do { ReplayResult result = timeline.replay_step_forward(RUN_CONTINUE, target.event); if (result.status == REPLAY_EXITED) { LOG(info) << "Debugger was not launched before end of trace"; return; } } while (!at_target()); unsigned short port = flags.dbg_port > 0 ? flags.dbg_port : getpid(); // Don't probe if the user specified a port. Explicitly // selecting a port is usually done by scripts, which would // presumably break if a different port were to be selected by // rr (otherwise why would they specify a port in the first // place). So fail with a clearer error message. auto probe = flags.dbg_port > 0 ? GdbConnection::DONT_PROBE : GdbConnection::PROBE_PORT; Task* t = timeline.current_session().current_task(); dbg = GdbConnection::await_client_connection( port, probe, t->tgid(), t->vm()->exe_image(), GdbConnection::Features(), flags.debugger_params_write_pipe); if (flags.debugger_params_write_pipe) { flags.debugger_params_write_pipe->close(); } debuggee_tguid = t->task_group()->tguid(); TraceFrame::Time first_run_event = t->vm()->first_run_event(); if (first_run_event) { timeline.set_reverse_execution_barrier_event(first_run_event); } activate_debugger(); GdbRequest last_resume_request; while (debug_one_step(last_resume_request) == CONTINUE_DEBUGGING) { } LOG(debug) << "debugger server exiting ..."; } void GdbServer::launch_gdb(ScopedFd& params_pipe_fd, const string& gdb_command_file_path, const string& gdb_binary_file_path) { GdbConnection::launch_gdb(params_pipe_fd, gdb_rr_macros(), gdb_command_file_path, gdb_binary_file_path); } void GdbServer::emergency_debug(Task* t) { // See the comment in |guard_overshoot()| explaining why we do // this. Unlike in that context though, we don't know if |t| // overshot an internal breakpoint. If it did, cover that // breakpoint up. if (t->vm()) { t->vm()->remove_all_breakpoints(); } // Don't launch a debugger on fatal errors; the user is most // likely already in a debugger, and wouldn't be able to // control another session. Instead, launch a new GdbServer and wait for // the user to connect from another window. GdbConnection::Features features; // Don't advertise reverse_execution to gdb becase a) it won't work and // b) some gdb versions will fail if the user doesn't turn off async // mode (and we don't want to require users to do that) features.reverse_execution = false; unique_ptr dbg = GdbConnection::await_client_connection( t->tid, GdbConnection::PROBE_PORT, t->tgid(), t->vm()->exe_image(), features); GdbServer(dbg, t).process_debugger_requests(); } string GdbServer::init_script() { return gdb_rr_macros(); } rr-4.1.0/src/GdbServer.h000066400000000000000000000216041265436462100147760ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_GDB_SERVER_H_ #define RR_GDB_SERVER_H_ #include #include #include #include "DiversionSession.h" #include "GdbConnection.h" #include "ReplaySession.h" #include "ReplayTimeline.h" #include "ScopedFd.h" #include "TraceFrame.h" class GdbServer { friend class GdbCommand; public: struct Target { Target() : pid(0), require_exec(false), event(0) {} // Target process to debug, or 0 to just debug the first process pid_t pid; // If true, wait for the target process to exec() before attaching debugger bool require_exec; // Wait until at least 'event' has elapsed before attaching TraceFrame::Time event; }; struct ConnectionFlags { // -1 to let GdbServer choose the port, a positive integer to select a // specific port to listen on. int dbg_port; // If non-null, then when the gdbserver is set up, we write its connection // parameters through this pipe. GdbServer::launch_gdb is passed the // other end of this pipe to exec gdb with the parameters. ScopedFd* debugger_params_write_pipe; ConnectionFlags() : dbg_port(-1), debugger_params_write_pipe(nullptr) {} }; /** * Create a gdbserver serving the replay of 'session'. */ GdbServer(std::shared_ptr session, const ReplaySession::Flags& flags, const Target& target) : target(target), final_event(-1), stop_reason(0), in_debuggee_end_state(false), stop_replaying_to_target(false), interrupt_pending(false), timeline(std::move(session), flags), emergency_debug_session(nullptr) {} /** * Actually run the server. Returns only when the debugger disconnects. */ void serve_replay(const ConnectionFlags& flags); /** * exec()'s gdb using parameters read from params_pipe_fd (and sent through * the pipe passed to serve_replay_with_debugger). */ static void launch_gdb(ScopedFd& params_pipe_fd, const std::string& gdb_command_file_path, const std::string& gdb_binary_file_path); /** * Start a debugging connection for |t| and return when there are no * more requests to process (usually because the debugger detaches). * * This helper doesn't attempt to determine whether blocking rr on a * debugger connection might be a bad idea. It will always open the debug * socket and block awaiting a connection. */ static void emergency_debug(Task* t); /** * A string containing the default gdbinit script that we load into gdb. */ static std::string init_script(); /** * Called from a signal handler (or other thread) during serve_replay, * this will cause the replay-to-target phase to be interrupted and * debugging started wherever the replay happens to be. */ void interrupt_replay_to_target() { stop_replaying_to_target = true; } /** * Return the register |which|, which may not have a defined value. */ static GdbRegisterValue get_reg(const Registers& regs, const ExtraRegisters& extra_regs, GdbRegister which); private: GdbServer(std::unique_ptr& dbg, Task* t) : dbg(std::move(dbg)), debuggee_tguid(t->task_group()->tguid()), last_continue_tuid(t->tuid()), last_query_tuid(t->tuid()), final_event(-1), stop_reason(0), stop_replaying_to_target(false), interrupt_pending(false), emergency_debug_session(&t->session()) {} Session& current_session() { return timeline.is_running() ? timeline.current_session() : *emergency_debug_session; } /** * If |req| is a magic-write command, interpret it and return true. * Otherwise, do nothing and return false. */ bool maybe_process_magic_command(const GdbRequest& req); void dispatch_regs_request(const Registers& regs, const ExtraRegisters& extra_regs); enum ReportState { REPORT_NORMAL, REPORT_THREADS_DEAD }; /** * Process the single debugger request |req|, made by |dbg| targeting * |t|, inside the session |session|. * * Callers should implement any special semantics they want for * particular debugger requests before calling this helper, to do * generic processing. */ void dispatch_debugger_request(Session& session, const GdbRequest& req, ReportState state); bool at_target(); void activate_debugger(); void restart_session(const GdbRequest& req); GdbRequest process_debugger_requests(ReportState state = REPORT_NORMAL); enum ContinueOrStop { CONTINUE_DEBUGGING, STOP_DEBUGGING }; bool detach_or_restart(const GdbRequest& req, ContinueOrStop* s); ContinueOrStop handle_exited_state(GdbRequest& last_resume_request); ContinueOrStop debug_one_step(GdbRequest& last_resume_request); /** * If 'req' is a reverse-singlestep, try to obtain the resulting state * directly from ReplayTimeline's mark database. If that succeeds, * report the singlestep break status to gdb and process any get-registers * requests. Repeat until we get a request that isn't reverse-singlestep * or get-registers, returning that request in 'req'. * During reverse-next commands, gdb tends to issue a series of * reverse-singlestep/get-registers pairs, and this makes those much * more efficient by avoiding having to actually reverse-singlestep the * session. */ void try_lazy_reverse_singlesteps(GdbRequest& req); /** * Process debugger requests made in |diversion_session| until action needs * to be taken by the caller (a resume-execution request is received). * The received request is returned through |req|. * Returns true if diversion should continue, false if it should end. */ bool diverter_process_debugger_requests(DiversionSession& diversion_session, uint32_t& diversion_refcount, GdbRequest* req); /** * Create a new diversion session using |replay| session as the * template. The |replay| session isn't mutated. * * Execution begins in the new diversion session under the control of * |dbg| starting with initial thread target |task|. The diversion * session ends at the request of |dbg|, and |divert| returns the first * request made that wasn't handled by the diversion session. That * is, the first request that should be handled by |replay| upon * resuming execution in that session. */ GdbRequest divert(ReplaySession& replay); /** * If |break_status| indicates a stop that we should report to gdb, * report it. |req| is the resume request that generated the stop. */ void maybe_notify_stop(const GdbRequest& req, const BreakStatus& break_status); /** * Return the checkpoint stored as |checkpoint_id| or nullptr if there * isn't one. */ ReplaySession::shr_ptr get_checkpoint(int checkpoint_id); /** * Delete the checkpoint stored as |checkpoint_id| if it exists, or do * nothing if it doesn't exist. */ void delete_checkpoint(int checkpoint_id); Target target; // dbg is initially null. Once the debugger connection is established, it // never changes. std::unique_ptr dbg; // When dbg is non-null, the TaskGroupUid of the task being debugged. Never // changes once the connection is established --- we don't currently // support switching gdb between debuggee processes. TaskGroupUid debuggee_tguid; // The TaskUid of the last continued task. TaskUid last_continue_tuid; // The TaskUid of the last queried task. TaskUid last_query_tuid; TraceFrame::Time final_event; // Stop reason for last notified stop. int stop_reason; bool in_debuggee_end_state; // True when the user has interrupted replaying to a target event. volatile bool stop_replaying_to_target; // True when a DREQ_INTERRUPT has been received but not handled, or when // we've restarted and want the first continue to be interrupted immediately. bool interrupt_pending; ReplayTimeline timeline; Session* emergency_debug_session; struct Checkpoint { enum Explicit { EXPLICIT, NOT_EXPLICIT }; Checkpoint(ReplayTimeline& timeline, TaskUid last_continue_tuid, Explicit e) : last_continue_tuid(last_continue_tuid), is_explicit(e) { if (e == EXPLICIT) { mark = timeline.add_explicit_checkpoint(); } else { mark = timeline.mark(); } } Checkpoint() : is_explicit(NOT_EXPLICIT) {} ReplayTimeline::Mark mark; TaskUid last_continue_tuid; Explicit is_explicit; }; // |debugger_restart_mark| is the point where we will restart from with // a no-op debugger "run" command. Checkpoint debugger_restart_checkpoint; // gdb checkpoints, indexed by ID std::map checkpoints; }; #endif /* RR_GDB_SERVER_H_ */ rr-4.1.0/src/HelpCommand.cc000066400000000000000000000015431265436462100154400ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "Command.h" #include "main.h" using namespace std; class HelpCommand : public Command { public: virtual int run(std::vector& args); protected: HelpCommand(const char* name, const char* help) : Command(name, help) {} static HelpCommand help1; static HelpCommand help2; static HelpCommand help3; }; HelpCommand HelpCommand::help1("help", " rr help [command]\n"); HelpCommand HelpCommand::help2("-h", nullptr); HelpCommand HelpCommand::help3("--help", nullptr); int HelpCommand::run(std::vector& args) { if (args.size() == 0) { print_usage(stdout); return 0; } Command* command = Command::command_for_name(args[0]); if (!command) { print_usage(stderr); return 1; } command->print_help(stdout); return 0; } rr-4.1.0/src/MagicSaveDataMonitor.cc000066400000000000000000000047631265436462100172610ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "MagicSaveDataMonitor.h" #include #include #include "log.h" #include "Session.h" #include "task.h" #include "util.h" static void dump_path_data(Task* t, TraceFrame::Time global_time, const char* tag, char* filename, size_t filename_size, const void* buf, size_t buf_len, remote_ptr addr) { format_dump_filename(t, global_time, tag, filename, filename_size); dump_binary_data(filename, tag, (const uint32_t*)buf, buf_len / 4, addr); } static void notify_save_data_error(Task* t, remote_ptr addr, const void* rec_buf, size_t rec_buf_len, const void* rep_buf, size_t rep_buf_len) { char rec_dump[PATH_MAX]; char rep_dump[PATH_MAX]; TraceFrame::Time global_time = t->current_trace_frame().time(); dump_path_data(t, global_time, "rec_save_data", rec_dump, sizeof(rec_dump), rec_buf, rec_buf_len, addr); dump_path_data(t, global_time, "rep_save_data", rep_dump, sizeof(rep_dump), rep_buf, rep_buf_len, addr); ASSERT(t, (rec_buf_len == rep_buf_len && !memcmp(rec_buf, rep_buf, rec_buf_len))) << "Divergence in contents of 'tracee-save buffer'. Recording executed\n" "\n" " write(" << RR_MAGIC_SAVE_DATA_FD << ", " << addr << ", " << rec_buf_len << ")\n" "\n" "and replay executed\n" "\n" " write(" << RR_MAGIC_SAVE_DATA_FD << ", " << addr << ", " << rep_buf_len << ")\n" "\n" "The contents of the tracee-save buffers have been dumped to disk.\n" "Compare them by using the following command\n" "\n" "$ diff -u " << rec_dump << " " << rep_dump << " >save-data-diverge.diff\n"; } void MagicSaveDataMonitor::did_write(Task* t, const std::vector& ranges) { for (auto& r : ranges) { if (t->session().is_recording()) { t->record_remote(r.data.cast(), r.length); } else if (t->session().is_replaying()) { auto bytes = t->read_mem(r.data.cast(), r.length); auto rec = t->trace_reader().read_raw_data(); if (rec.data != bytes) { notify_save_data_error(t, rec.addr, rec.data.data(), rec.data.size(), bytes.data(), bytes.size()); } } } } rr-4.1.0/src/MagicSaveDataMonitor.h000066400000000000000000000011161265436462100171100ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_MAGIC_SAVE_DATA_MONITOR_H_ #define RR_MAGIC_SAVE_DATA_MONITOR_H_ #include "FileMonitor.h" /** * A FileMonitor to track writes to RR_MAGIC_SAVE_DATA_FD. */ class MagicSaveDataMonitor : public FileMonitor { public: MagicSaveDataMonitor() {} /** * During recording, record the written data. * During replay, check that the written data matches what was recorded. */ virtual void did_write(Task* t, const std::vector& ranges); }; #endif /* RR_MAGIC_SAVE_DATA_MONITOR_H_ */ rr-4.1.0/src/MemoryRange.h000066400000000000000000000036251265436462100153430ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_MEMORY_RANGE_H_ #define RR_MEMORY_RANGE_H_ #include "remote_ptr.h" /** * Range of memory addresses that can be used as a std::map key. */ class MemoryRange { public: MemoryRange() {} MemoryRange(remote_ptr addr, size_t num_bytes) : start_(addr), end_(addr + num_bytes) { assert(start_ <= end_); } MemoryRange(remote_ptr addr, remote_ptr end) : start_(addr), end_(end) { assert(start_ <= end); } MemoryRange(const MemoryRange&) = default; MemoryRange& operator=(const MemoryRange&) = default; bool operator==(const MemoryRange& o) const { return start_ == o.start_ && end_ == o.end_; } bool operator<(const MemoryRange& o) const { return start_ != o.start_ ? start_ < o.start_ : end_ < o.end_; } /** * Return true iff |o| is an address range fully contained by * this. */ bool contains(const MemoryRange& o) const { return start_ <= o.start_ && o.end_ <= end_; } bool intersects(const MemoryRange& other) const { remote_ptr s = std::max(start_, other.start_); remote_ptr e = std::min(end_, other.end_); return s < e; } MemoryRange intersect(const MemoryRange& other) const { remote_ptr s = std::max(start_, other.start_); remote_ptr e = std::min(end_, other.end_); return MemoryRange(s, std::max(s, e)); } remote_ptr start() const { return start_; } remote_ptr end() const { return end_; } size_t size() const { return end_ - start_; } // XXX DO NOT USE void update_start(remote_ptr s) const { const_cast(this)->start_ = s; } private: remote_ptr start_; remote_ptr end_; }; inline std::ostream& operator<<(std::ostream& o, const MemoryRange& m) { o << m.start() << "-" << m.end(); return o; } #endif /* RR_MEMORY_RANGE_H_ */ rr-4.1.0/src/Monkeypatcher.cc000066400000000000000000001043701265436462100160640ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "Monkeypatcher" #include "Monkeypatcher.h" #include "AddressSpace.h" #include "AutoRemoteSyscalls.h" #include "elf.h" #include "kernel_abi.h" #include "kernel_metadata.h" #include "log.h" #include "ReplaySession.h" #include "ScopedFd.h" #include "task.h" using namespace rr; using namespace std; #include "AssemblyTemplates.generated" static void write_and_record_bytes(Task* t, remote_ptr child_addr, size_t size, const void* buf) { t->write_bytes_helper(child_addr, size, buf); t->record_local(child_addr, size, buf); } template static void write_and_record_bytes(Task* t, remote_ptr child_addr, const uint8_t(&buf)[N]) { write_and_record_bytes(t, child_addr, N, buf); } template static void write_and_record_mem(Task* t, remote_ptr child_addr, const T* val, int count) { t->write_bytes_helper(child_addr, sizeof(*val) * count, static_cast(val)); t->record_local(child_addr, sizeof(T) * count, val); } /** * RecordSession sets up an LD_PRELOAD environment variable with an entry * SYSCALLBUF_LIB_FILENAME_PADDED which is big enough to hold either the * 32-bit or 64-bit preload library file names. Immediately after exec we * enter this function, which patches the environment variable value with * the correct library name for the task's architecture. * * It's possible for this to fail if a tracee alters the LD_PRELOAD value * and then does an exec. That's just too bad. If we ever have to handle that, * we should modify the environment passed to the exec call. */ template static void setup_preload_library_path(Task* t) { static_assert(sizeof(SYSCALLBUF_LIB_FILENAME_PADDED) == sizeof(SYSCALLBUF_LIB_FILENAME_32), "filename length mismatch"); const char* lib_name = sizeof(typename Arch::unsigned_word) < sizeof(uintptr_t) ? SYSCALLBUF_LIB_FILENAME_32 : SYSCALLBUF_LIB_FILENAME_PADDED; auto p = t->regs().sp().cast(); auto argc = t->read_mem(p); p += 1 + argc + 1; // skip argc, argc parameters, and trailing NULL while (true) { auto envp = t->read_mem(p); if (!envp) { ASSERT(t, false) << "LD_PRELOAD not found"; return; } string env = t->read_c_str(envp); if (env.find("LD_PRELOAD=") != 0) { ++p; continue; } size_t lib_pos = env.find(SYSCALLBUF_LIB_FILENAME_BASE); if (lib_pos == string::npos) { ASSERT(t, false) << SYSCALLBUF_LIB_FILENAME_BASE " not found in LD_PRELOAD"; return; } size_t next_colon = env.find(':', lib_pos); if (next_colon != string::npos) { while (env[next_colon + 1] == ':' || env[next_colon + 1] == 0) { ++next_colon; } if (next_colon < lib_pos + sizeof(SYSCALLBUF_LIB_FILENAME_PADDED) - 1) { ASSERT(t, false) << "Insufficient space for " << lib_name << " in LD_PRELOAD before next ':'"; return; } } if (env.length() < lib_pos + sizeof(SYSCALLBUF_LIB_FILENAME_PADDED) - 1) { ASSERT(t, false) << "Insufficient space for " << lib_name << " in LD_PRELOAD before end of string"; return; } remote_ptr dest = envp + lib_pos; write_and_record_mem(t, dest.cast(), lib_name, sizeof(SYSCALLBUF_LIB_FILENAME_PADDED) - 1); return; } } void Monkeypatcher::init_dynamic_syscall_patching( Task* t, int syscall_patch_hook_count, remote_ptr syscall_patch_hooks, remote_ptr stub_buffer, remote_ptr stub_buffer_end) { if (syscall_patch_hook_count) { syscall_hooks = t->read_mem(syscall_patch_hooks, syscall_patch_hook_count); } this->stub_buffer = stub_buffer; this->stub_buffer_end = stub_buffer_end; } template static bool patch_syscall_with_hook_arch(Monkeypatcher& patcher, Task* t, const syscall_patch_hook& hook); remote_ptr Monkeypatcher::allocate_stub(Task* t, size_t bytes) { if (!stub_buffer) { return nullptr; } ASSERT(t, (stub_buffer_end - stub_buffer) % bytes == 0) << "Stub size mismatch"; if (stub_buffer + stub_buffer_allocated + bytes > stub_buffer_end) { return nullptr; } auto result = stub_buffer.cast() + stub_buffer_allocated; stub_buffer_allocated += bytes; return result; } template static void substitute(uint8_t* buffer, uint64_t return_addr, uint32_t trampoline_relative_addr); template <> void substitute(uint8_t* buffer, uint64_t return_addr, uint32_t trampoline_relative_addr) { X86SyscallStubMonkeypatch::substitute(buffer, (uint32_t)return_addr, trampoline_relative_addr); } template <> void substitute(uint8_t* buffer, uint64_t return_addr, uint32_t trampoline_relative_addr) { X64SyscallStubMonkeypatch::substitute(buffer, (uint32_t)return_addr, (uint32_t)(return_addr >> 32), trampoline_relative_addr); } template static void substitute_extended_jump(uint8_t* buffer, uint64_t from_end, uint64_t to_start); template <> void substitute_extended_jump(uint8_t* buffer, uint64_t from_end, uint64_t to_start) { int64_t offset = to_start - from_end; // An offset that appears to be > 2GB is OK here, since EIP will just // wrap around. X86SyscallStubExtendedJump::substitute(buffer, (int32_t)offset); } template <> void substitute_extended_jump(uint8_t* buffer, uint64_t from_end, uint64_t to_start) { X64SyscallStubExtendedJump::substitute(buffer, to_start); } /** * Allocate an extended jump in an extended jump page and return its address. * The resulting address must be within 2G of from_end, and the instruction * there must jump to to_start. */ template static remote_ptr allocate_extended_jump( Task* t, vector& pages, remote_ptr from_end, remote_ptr to_start) { Monkeypatcher::ExtendedJumpPage* page = nullptr; for (auto& p : pages) { remote_ptr page_jump_start = p.addr + p.allocated; int64_t offset = page_jump_start - from_end; if ((int32_t)offset == offset && p.allocated + ExtendedJumpPatch::size <= page_size()) { page = &p; break; } } if (!page) { // Find free space after the patch site. auto maps = t->vm()->maps_starting_at(t->vm()->mapping_of(from_end).map.start()); auto current = maps.begin(); // We're looking for a gap of three pages --- one page to allocate and // a page on each side as a guard page. uint32_t required_space = 3 * page_size(); while (current != maps.end()) { auto next = current; ++next; if (next == maps.end()) { if (current->map.end() + required_space >= current->map.end()) { break; } } else { if (current->map.end() + required_space <= next->map.start()) { break; } } current = next; } if (current == maps.end()) { LOG(debug) << "Can't find space for our jump page"; return nullptr; } remote_ptr addr = (current->map.end() + page_size()).cast(); int64_t offset = addr - from_end; if ((int32_t)offset != offset) { LOG(debug) << "Can't find space close enough for the jump"; return nullptr; } { AutoRemoteSyscalls remote(t); int prot = PROT_READ | PROT_EXEC; int flags = MAP_ANONYMOUS | MAP_FIXED | MAP_PRIVATE; remote.infallible_mmap_syscall(addr, page_size(), prot, flags, -1, 0); KernelMapping recorded(addr, addr + page_size(), string(), KernelMapping::NO_DEVICE, KernelMapping::NO_INODE, prot, flags); t->vm()->map(addr, page_size(), prot, flags, 0, string(), KernelMapping::NO_DEVICE, KernelMapping::NO_INODE, &recorded); t->trace_writer().write_mapped_region(recorded, recorded.fake_stat(), TraceWriter::PATCH_MAPPING); } pages.push_back(Monkeypatcher::ExtendedJumpPage(addr)); page = &pages.back(); } uint8_t jump_patch[ExtendedJumpPatch::size]; remote_ptr jump_addr = page->addr + page->allocated; substitute_extended_jump( jump_patch, jump_addr.as_int() + sizeof(jump_patch), to_start.as_int()); write_and_record_bytes(t, jump_addr, jump_patch); page->allocated += sizeof(jump_patch); return jump_addr; } /** * Some functions make system calls while storing local variables in memory * below the stack pointer. We need to decrement the stack pointer by * some "safety zone" amount to get clear of those variables before we make * a call instruction. So, we allocate a stub per patched callsite, and jump * from the callsite to the stub. The stub decrements the stack pointer, * calls the appropriate syscall hook function, reincrements the stack pointer, * and jumps back to immediately after the patched callsite. * * It's important that gdb stack traces work while a thread is stopped in the * syscallbuf code. To ensure that the above manipulations don't foil gdb's * stack walking code, we add CFI data to all the stubs. To ease that, the * stubs are written in assembly and linked into the preload library. * * On x86-64 with ASLR, we need to be able to patch a call to a stub from * sites more than 2^31 bytes away. We only have space for a 5-byte jump * instruction. So, we allocate "extender pages" --- pages of memory within * 2GB of the patch site, within which we allocate instructions that can jump * anywhere in memory. We don't really need this on x86, but we do it there * too for consistency. */ template static bool patch_syscall_with_hook_x86ish(Monkeypatcher& patcher, Task* t, const syscall_patch_hook& hook) { uint8_t stub_patch[StubPatch::size]; auto stub_patch_start = patcher.allocate_stub(t, sizeof(stub_patch)); if (!stub_patch_start) { LOG(debug) << "syscall can't be patched due to stub allocation failure"; return false; } auto stub_patch_after_trampoline_call = stub_patch_start + trampoline_call_end; uint8_t jump_patch[JumpPatch::size]; // We're patching in a relative jump, so we need to compute the offset from // the end of the jump to our actual destination. auto jump_patch_start = t->regs().ip().to_data_ptr(); auto jump_patch_end = jump_patch_start + sizeof(jump_patch); remote_ptr extended_jump_start = allocate_extended_jump( t, patcher.extended_jump_pages, jump_patch_end, stub_patch_start); if (extended_jump_start.is_null()) { return false; } intptr_t jump_offset = extended_jump_start - jump_patch_end; int32_t jump_offset32 = (int32_t)jump_offset; ASSERT(t, jump_offset32 == jump_offset) << "allocate_extended_jump didn't work"; intptr_t trampoline_call_offset = hook.hook_address - stub_patch_after_trampoline_call.as_int(); int32_t trampoline_call_offset32 = (int32_t)trampoline_call_offset; ASSERT(t, trampoline_call_offset32 == trampoline_call_offset) << "How did the stub area get far away from the hooks?"; JumpPatch::substitute(jump_patch, jump_offset32); write_and_record_bytes(t, jump_patch_start, jump_patch); // pad with NOPs to the next instruction static const uint8_t NOP = 0x90; assert(syscall_instruction_length(x86_64) == syscall_instruction_length(x86)); uint8_t nops[syscall_instruction_length(x86_64) + hook.next_instruction_length - sizeof(jump_patch)]; memset(nops, NOP, sizeof(nops)); write_and_record_mem(t, jump_patch_start + sizeof(jump_patch), nops, sizeof(nops)); // Now write out the stub substitute(stub_patch, jump_patch_end.as_int(), trampoline_call_offset32); write_and_record_bytes(t, stub_patch_start, stub_patch); return true; } template <> bool patch_syscall_with_hook_arch(Monkeypatcher& patcher, Task* t, const syscall_patch_hook& hook) { return patch_syscall_with_hook_x86ish(patcher, t, hook); } template <> bool patch_syscall_with_hook_arch(Monkeypatcher& patcher, Task* t, const syscall_patch_hook& hook) { return patch_syscall_with_hook_x86ish(patcher, t, hook); } static bool patch_syscall_with_hook(Monkeypatcher& patcher, Task* t, const syscall_patch_hook& hook) { RR_ARCH_FUNCTION(patch_syscall_with_hook_arch, t->arch(), patcher, t, hook); } static void operator<<(ostream& stream, const vector& bytes) { for (uint32_t i = 0; i < bytes.size(); ++i) { if (i > 0) { stream << ' '; } stream << HEX(bytes[i]); } } bool Monkeypatcher::try_patch_syscall(Task* t) { if (syscall_hooks.empty()) { // Syscall hooks not set up yet. Don't spew warnings, and don't // fill tried_to_patch_syscall_addresses with addresses that we might be // able to patch later. return false; } if (t->is_in_traced_syscall()) { // Never try to patch the traced-syscall in our preload library! return false; } Registers r = t->regs(); if (tried_to_patch_syscall_addresses.count(r.ip())) { return false; } // We could examine the current syscall number and if it's not one that // we support syscall buffering for, refuse to patch the syscall instruction. // This would, on the face of it, reduce overhead since patching the // instruction just means a useless trip through the syscall buffering logic. // However, it actually wouldn't help much since we'd still to a switch // on the syscall number in this function instead, and due to context // switching costs any overhead saved would be insignificant. // Also, implementing that would require keeping a buffered-syscalls // list in sync with the preload code, which is unnecessary complexity. tried_to_patch_syscall_addresses.insert(r.ip()); syscall_patch_hook dummy; auto next_instruction = t->read_mem(r.ip().to_data_ptr(), sizeof(dummy.next_instruction_bytes)); intptr_t syscallno = r.original_syscallno(); for (auto& hook : syscall_hooks) { if (memcmp(next_instruction.data(), hook.next_instruction_bytes, hook.next_instruction_length) == 0) { // Get out of executing the current syscall before we patch it. t->exit_syscall_and_prepare_restart(); patch_syscall_with_hook(*this, t, hook); LOG(debug) << "Patched syscall at " << r.ip() << " syscall " << syscall_name(syscallno, t->arch()) << " tid " << t->tid << " bytes " << next_instruction; // Return to caller, which resume normal execution. return true; } } LOG(debug) << "Failed to patch syscall at " << r.ip() << " syscall " << syscall_name(syscallno, t->arch()) << " tid " << t->tid << " bytes " << next_instruction; return false; } class SymbolTable { public: bool is_name(size_t i, const char* name) const { size_t offset = symbols[i].name_index; return offset < strtab.size() && strcmp(&strtab[offset], name) == 0; } uintptr_t file_offset(size_t i) const { return symbols[i].file_offset; } size_t size() const { return symbols.size(); } struct Symbol { Symbol(uintptr_t file_offset, size_t name_index) : file_offset(file_offset), name_index(name_index) {} Symbol() {} uintptr_t file_offset; size_t name_index; }; vector symbols; vector strtab; }; class ElfReader { public: virtual ~ElfReader() {} virtual bool read(size_t offset, size_t size, void* buf) = 0; template bool read(size_t offset, T& result) { return read(offset, sizeof(result), &result); } template vector read(size_t offset, size_t count) { vector result; result.resize(count); if (!read(offset, sizeof(T) * count, result.data())) { result.clear(); } return result; } template SymbolTable read_symbols_arch(const char* symtab, const char* strtab); SymbolTable read_symbols(SupportedArch arch, const char* symtab, const char* strtab); }; template SymbolTable ElfReader::read_symbols_arch(const char* symtab, const char* strtab) { SymbolTable result; typename Arch::ElfEhdr elfheader; if (!read(0, elfheader) || memcmp(&elfheader, ELFMAG, SELFMAG) != 0 || elfheader.e_ident[EI_CLASS] != Arch::elfclass || elfheader.e_ident[EI_DATA] != Arch::elfendian || elfheader.e_machine != Arch::elfmachine || elfheader.e_shentsize != sizeof(typename Arch::ElfShdr) || elfheader.e_shstrndx >= elfheader.e_shnum) { LOG(debug) << "Invalid ELF file: invalid header"; return result; } auto sections = read(elfheader.e_shoff, elfheader.e_shnum); if (sections.empty()) { LOG(debug) << "Invalid ELF file: no sections"; return result; } auto& section_names_section = sections[elfheader.e_shstrndx]; auto section_names = read(section_names_section.sh_offset, section_names_section.sh_size); if (section_names.empty()) { LOG(debug) << "Invalid ELF file: can't read section names"; return result; } section_names[section_names.size() - 1] = 0; typename Arch::ElfShdr* symbols = nullptr; typename Arch::ElfShdr* strings = nullptr; for (size_t i = 0; i < elfheader.e_shnum; ++i) { auto& s = sections[i]; if (s.sh_name >= section_names.size()) { LOG(debug) << "Invalid ELF file: invalid name offset for section " << i; return result; } const char* name = section_names.data() + s.sh_name; if (strcmp(name, symtab) == 0) { if (symbols) { LOG(debug) << "Invalid ELF file: duplicate symbol section " << symtab; return result; } symbols = &s; } if (strcmp(name, strtab) == 0) { if (strings) { LOG(debug) << "Invalid ELF file: duplicate string section " << strtab; return result; } strings = &s; } } if (!symbols) { LOG(debug) << "Invalid ELF file: missing symbol section " << symtab; return result; } if (!strings) { LOG(debug) << "Invalid ELF file: missing string section " << strtab; return result; } if (symbols->sh_entsize != sizeof(typename Arch::ElfSym)) { LOG(debug) << "Invalid ELF file: incorrect symbol size " << symbols->sh_entsize; return result; } if (symbols->sh_size % symbols->sh_entsize) { LOG(debug) << "Invalid ELF file: incorrect symbol section size " << symbols->sh_size; return result; } auto symbol_list = read( symbols->sh_offset, symbols->sh_size / symbols->sh_entsize); if (symbol_list.empty()) { LOG(debug) << "Invalid ELF file: can't read symbols " << symtab; return result; } result.strtab = read(strings->sh_offset, strings->sh_size); if (result.strtab.empty()) { LOG(debug) << "Invalid ELF file: can't read strings " << strtab; } result.symbols.resize(symbol_list.size()); for (size_t i = 0; i < symbol_list.size(); ++i) { auto& s = symbol_list[i]; if (s.st_shndx >= sections.size()) { continue; } auto& section = sections[s.st_shndx]; result.symbols[i] = SymbolTable::Symbol( s.st_value - section.sh_addr + section.sh_offset, s.st_name); } return result; } SymbolTable ElfReader::read_symbols(SupportedArch arch, const char* symtab, const char* strtab) { RR_ARCH_FUNCTION(read_symbols_arch, arch, symtab, strtab); } class VdsoReader : public ElfReader { public: VdsoReader(Task* t) : t(t) {} virtual bool read(size_t offset, size_t size, void* buf) { bool ok = true; t->read_bytes_helper(t->vm()->vdso().start() + offset, size, buf, &ok); return ok; } Task* t; }; static SymbolTable read_vdso_symbols(Task* t) { return VdsoReader(t).read_symbols(t->arch(), ".dynsym", ".dynstr"); } /** * Return true iff |addr| points to a known |__kernel_vsyscall()| * implementation. */ static bool is_kernel_vsyscall(Task* t, remote_ptr addr) { uint8_t impl[X86SysenterVsyscallImplementation::size]; t->read_bytes(addr, impl); return X86SysenterVsyscallImplementation::match(impl); } /** * Return the address of a recognized |__kernel_vsyscall()| * implementation in |t|'s address space. */ static remote_ptr locate_and_verify_kernel_vsyscall( Task* t, const SymbolTable& syms) { remote_ptr kernel_vsyscall = nullptr; // It is unlikely but possible that multiple, versioned __kernel_vsyscall // symbols will exist. But we can't rely on setting |kernel_vsyscall| to // catch that case, because only one of the versioned symbols will // actually match what we expect to see, and the matching one might be // the last one. Therefore, we have this separate flag to alert us to // this possibility. bool seen_kernel_vsyscall = false; for (size_t i = 0; i < syms.size(); ++i) { if (syms.is_name(i, "__kernel_vsyscall")) { remote_ptr candidate = syms.file_offset(i); // The symbol values can be absolute or relative addresses. // The first part of the assertion is for absolute // addresses, and the second part is for relative. if ((candidate.as_int() & ~uintptr_t(0xfff)) != 0xffffe000 && (candidate.as_int() & ~uintptr_t(0xfff)) != 0) { // With 4.2.8-300.fc23.x86_64, execve_loop_32 seems to once in a while // see a VDSO with a crazy file offset in it which is a duplicate // __kernel_vsyscall. Bizzarro. Ignore it. continue; } ASSERT(t, !seen_kernel_vsyscall); seen_kernel_vsyscall = true; // The ELF information in the VDSO assumes that the VDSO // is always loaded at a particular address. The kernel, // however, subjects the VDSO to ASLR, which means that // we have to adjust the offsets properly. auto vdso_start = t->vm()->vdso().start(); uintptr_t candidate_offset = candidate.as_int() & uintptr_t(0xfff); candidate = vdso_start + candidate_offset; if (is_kernel_vsyscall(t, candidate)) { kernel_vsyscall = candidate; } } } return kernel_vsyscall; } // VDSOs are filled with overhead critical functions related to getting the // time and current CPU. We need to ensure that these syscalls get redirected // into actual trap-into-the-kernel syscalls so rr can intercept them. template static void patch_after_exec_arch(Task* t, Monkeypatcher& patcher); template static void patch_at_preload_init_arch(Task* t, Monkeypatcher& patcher); struct named_syscall { const char* name; int syscall_number; }; // Monkeypatch x86-32 vdso syscalls immediately after exec. The vdso syscalls // will cause replay to fail if called by the dynamic loader or some library's // static constructors, so we can't wait for our preload library to be // initialized. Fortunately we're just replacing the vdso code with real // syscalls so there is no dependency on the preload library at all. template <> void patch_after_exec_arch(Task* t, Monkeypatcher& patcher) { setup_preload_library_path(t); auto syms = read_vdso_symbols(t); patcher.x86_sysenter_vsyscall = locate_and_verify_kernel_vsyscall(t, syms); if (!patcher.x86_sysenter_vsyscall) { FATAL() << "Failed to monkeypatch vdso: your __kernel_vsyscall() wasn't " "recognized.\n" " Syscall buffering is now effectively disabled. If you're " "OK with\n" " running rr without syscallbuf, then run the recorder " "passing the\n" " --no-syscall-buffer arg.\n" " If you're *not* OK with that, file an issue."; } // Patch __kernel_vsyscall to use int 80 instead of sysenter. // During replay we may remap the VDSO to a new address, and the sysenter // instruction would return to the old address, so we must make sure sysenter // is never used. uint8_t patch[X86SysenterVsyscallUseInt80::size]; X86SysenterVsyscallUseInt80::substitute(patch); write_and_record_bytes(t, patcher.x86_sysenter_vsyscall, patch); LOG(debug) << "monkeypatched __kernel_vsyscall to use int $80"; auto vdso_start = t->vm()->vdso().start(); static const named_syscall syscalls_to_monkeypatch[] = { #define S(n) \ { "__vdso_" #n, X86Arch::n } S(clock_gettime), S(gettimeofday), S(time), #undef S }; for (size_t i = 0; i < syms.size(); ++i) { for (size_t j = 0; j < array_length(syscalls_to_monkeypatch); ++j) { if (syms.is_name(i, syscalls_to_monkeypatch[j].name)) { static const uintptr_t vdso_max_size = 0xffffLL; uintptr_t sym_address = syms.file_offset(i); ASSERT(t, (sym_address & ~vdso_max_size) == 0); uintptr_t absolute_address = vdso_start.as_int() + sym_address; uint8_t patch[X86VsyscallMonkeypatch::size]; uint32_t syscall_number = syscalls_to_monkeypatch[j].syscall_number; X86VsyscallMonkeypatch::substitute(patch, syscall_number); write_and_record_bytes(t, absolute_address, patch); LOG(debug) << "monkeypatched " << syscalls_to_monkeypatch[j].name << " to syscall " << syscalls_to_monkeypatch[j].syscall_number; } } } } // Monkeypatch x86 vsyscall hook only after the preload library // has initialized. The vsyscall hook expects to be able to use the syscallbuf. // Before the preload library has initialized, the regular vsyscall code // will trigger ptrace traps and be handled correctly by rr. template <> void patch_at_preload_init_arch(Task* t, Monkeypatcher& patcher) { auto params = t->read_mem( remote_ptr >(t->regs().arg1())); if (!params.syscallbuf_enabled) { return; } auto kernel_vsyscall = patcher.x86_sysenter_vsyscall; // Luckily, linux is happy for us to scribble directly over // the vdso mapping's bytes without mprotecting the region, so // we don't need to prepare remote syscalls here. remote_ptr syscall_hook_trampoline = params.syscall_hook_trampoline; uint8_t patch[X86SysenterVsyscallSyscallHook::size]; // We're patching in a relative jump, so we need to compute the offset from // the end of the jump to our actual destination. X86SysenterVsyscallSyscallHook::substitute( patch, syscall_hook_trampoline.as_int() - (kernel_vsyscall + sizeof(patch)).as_int()); write_and_record_bytes(t, kernel_vsyscall, patch); LOG(debug) << "monkeypatched __kernel_vsyscall to jump to " << HEX(syscall_hook_trampoline.as_int()); patcher.init_dynamic_syscall_patching( t, params.syscall_patch_hook_count, params.syscall_patch_hooks, params.syscall_hook_stub_buffer, params.syscall_hook_stub_buffer_end); } // Monkeypatch x86-64 vdso syscalls immediately after exec. The vdso syscalls // will cause replay to fail if called by the dynamic loader or some library's // static constructors, so we can't wait for our preload library to be // initialized. Fortunately we're just replacing the vdso code with real // syscalls so there is no dependency on the preload library at all. template <> void patch_after_exec_arch(Task* t, Monkeypatcher& patcher) { setup_preload_library_path(t); auto vdso_start = t->vm()->vdso().start(); auto syms = read_vdso_symbols(t); static const named_syscall syscalls_to_monkeypatch[] = { #define S(n) \ { "__vdso_" #n, X64Arch::n } S(clock_gettime), S(gettimeofday), S(time), // getcpu isn't supported by rr, so any changes to this monkeypatching // scheme for efficiency's sake will have to ensure that getcpu gets // converted to an actual syscall so rr will complain appropriately. S(getcpu), #undef S }; for (size_t i = 0; i < syms.size(); ++i) { for (size_t j = 0; j < array_length(syscalls_to_monkeypatch); ++j) { if (syms.is_name(i, syscalls_to_monkeypatch[j].name)) { // Absolutely-addressed symbols in the VDSO claim to start here. static const uint64_t vdso_static_base = 0xffffffffff700000LL; static const uintptr_t vdso_max_size = 0xffffLL; uintptr_t sym_address = syms.file_offset(i); // The symbol values can be absolute or relative addresses. // The first part of the assertion is for absolute // addresses, and the second part is for relative. ASSERT(t, (sym_address & ~vdso_max_size) == vdso_static_base || (sym_address & ~vdso_max_size) == 0); uintptr_t sym_offset = sym_address & vdso_max_size; uintptr_t absolute_address = vdso_start.as_int() + sym_offset; uint8_t patch[X64VsyscallMonkeypatch::size]; uint32_t syscall_number = syscalls_to_monkeypatch[j].syscall_number; X64VsyscallMonkeypatch::substitute(patch, syscall_number); write_and_record_bytes(t, absolute_address, patch); LOG(debug) << "monkeypatched " << syscalls_to_monkeypatch[j].name << " to syscall " << syscalls_to_monkeypatch[j].syscall_number; } } } } template <> void patch_at_preload_init_arch(Task* t, Monkeypatcher& patcher) { auto params = t->read_mem( remote_ptr >(t->regs().arg1())); if (!params.syscallbuf_enabled) { return; } patcher.init_dynamic_syscall_patching( t, params.syscall_patch_hook_count, params.syscall_patch_hooks, params.syscall_hook_stub_buffer, params.syscall_hook_stub_buffer_end); } void Monkeypatcher::patch_after_exec(Task* t) { ASSERT(t, 1 == t->vm()->task_set().size()) << "Can't have multiple threads immediately after exec!"; RR_ARCH_FUNCTION(patch_after_exec_arch, t->arch(), t, *this); } void Monkeypatcher::patch_at_preload_init(Task* t) { ASSERT(t, 1 == t->vm()->task_set().size()) << "TODO: monkeypatch multithreaded process"; // NB: the tracee can't be interrupted with a signal while // we're processing the rrcall, because it's masked off all // signals. RR_ARCH_FUNCTION(patch_at_preload_init_arch, t->arch(), t, *this); } class FileReader : public ElfReader { public: FileReader(ScopedFd& fd) : fd(fd) {} virtual bool read(size_t offset, size_t size, void* buf) { return pread(fd.get(), buf, size, offset) == ssize_t(size); } ScopedFd& fd; }; static void set_and_record_bytes(Task* t, uint64_t file_offset, const void* bytes, size_t size, remote_ptr map_start, size_t map_size, size_t map_offset_pages) { uint64_t map_offset = uint64_t(map_offset_pages) * page_size(); if (file_offset < map_offset || file_offset + size > map_offset + map_size) { // The value(s) to be set are outside the mapped range. This happens // because code and data can be mapped in separate, partial mmaps in which // case some symbols will be outside the mapped range. return; } remote_ptr addr = map_start + uintptr_t(file_offset - map_offset); bool ok = true; t->write_bytes_helper(addr, size, bytes, &ok); // Writing can fail when the value appears to be in the mapped range, but it // actually is beyond the file length. if (ok) { t->record_local(addr, size, bytes); } } void Monkeypatcher::patch_after_mmap(Task* t, remote_ptr start, size_t size, size_t offset_pages, int child_fd) { const auto& map = t->vm()->mapping_of(start); if (map.map.fsname().find("libpthread") != string::npos && (t->arch() == x86 || t->arch() == x86_64)) { ScopedFd open_fd = t->open_fd(child_fd, O_RDONLY); ASSERT(t, open_fd.is_open()) << "Failed to open child fd " << child_fd; auto syms = FileReader(open_fd).read_symbols(t->arch(), ".symtab", ".strtab"); for (size_t i = 0; i < syms.size(); ++i) { if (syms.is_name(i, "__elision_aconf")) { static const int zero = 0; // Setting __elision_aconf.retry_try_xbegin to zero means that // pthread rwlocks don't try to use elision at all. See ELIDE_LOCK // in glibc's elide.h. set_and_record_bytes(t, syms.file_offset(i) + 8, &zero, sizeof(zero), start, size, offset_pages); } if (syms.is_name(i, "elision_init")) { // Make elision_init return without doing anything. This means // the __elision_available and __pthread_force_elision flags will // remain zero, disabling elision for mutexes. See glibc's // elision-conf.c. static const uint8_t ret = 0xC3; set_and_record_bytes(t, syms.file_offset(i), &ret, sizeof(ret), start, size, offset_pages); } } } } rr-4.1.0/src/Monkeypatcher.h000066400000000000000000000071061265436462100157250ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_MONKEYPATCHER_H_ #define RR_MONKEYPATCHER_H_ #include #include #include "preload/preload_interface.h" #include "remote_ptr.h" #include "remote_code_ptr.h" class ScopedFd; class Task; /** * A class encapsulating patching state. There is one instance of this * class per tracee address space. Currently this class performs the following * tasks: * * 1) Patch the VDSO's user-space-only implementation of certain system calls * (e.g. gettimeofday) to do a proper kernel system call instead, so rr can * trap and record it (x86-64 only). * * 2) Patch the VDSO __kernel_vsyscall fast-system-call stub to redirect to * our syscall hook in the preload library (x86 only). * * 3) Patch syscall instructions whose following instructions match a known * pattern to call the syscall hook. * * Monkeypatcher only runs during recording, never replay. */ class Monkeypatcher { public: Monkeypatcher() : stub_buffer_allocated(0) {} Monkeypatcher(const Monkeypatcher& o) = default; /** * Apply any necessary patching immediately after exec. * In this hook we patch everything that doesn't depend on the preload * library being loaded. */ void patch_after_exec(Task* t); /** * During librrpreload initialization, apply patches that require the * preload library to be initialized. */ void patch_at_preload_init(Task* t); /** * Try to patch the syscall instruction that |t| just entered. If this * returns false, patching failed and the syscall should be processed * as normal. If this returns true, patching succeeded and the syscall * was aborted; ip() has been reset to the start of the patched syscall, * and execution should resume normally to execute the patched code. * Zero or more mapping operations are also recorded to the trace and must * be replayed. */ bool try_patch_syscall(Task* t); void init_dynamic_syscall_patching( Task* t, int syscall_patch_hook_count, remote_ptr syscall_patch_hooks, remote_ptr stub_buffer, remote_ptr stub_buffer_end); /** * Try to allocate a stub from the sycall patching stub buffer. Returns null * if there's no buffer or we've run out of free stubs. */ remote_ptr allocate_stub(Task* t, size_t bytes); /** * Apply any necessary patching immediately after an mmap. We use this to * patch libpthread.so. */ void patch_after_mmap(Task* t, remote_ptr start, size_t size, size_t offset_pages, int child_fd); remote_ptr x86_sysenter_vsyscall; /** * The list of pages we've allocated to hold our extended jumps. */ struct ExtendedJumpPage { ExtendedJumpPage(remote_ptr addr) : addr(addr), allocated(0) {} remote_ptr addr; size_t allocated; }; std::vector extended_jump_pages; private: /** * The list of supported syscall patches obtained from the preload * library. Each one matches a specific byte signature for the instruction(s) * after a syscall instruction. */ std::vector syscall_hooks; /** * The addresses of the instructions following syscalls that we've tried * (or are currently trying) to patch. */ std::unordered_set tried_to_patch_syscall_addresses; /** * Writable executable memory where we can generate stubs. */ remote_ptr stub_buffer; remote_ptr stub_buffer_end; size_t stub_buffer_allocated; }; #endif /* RR_MONKEYPATCHER_H_ */ rr-4.1.0/src/PerfCounters.cc000066400000000000000000000170121265436462100156660ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "PerfCounters.h" #include #include #include #include #include #include #include #include #include #include #include #include #include "kernel_metadata.h" #include "log.h" #include "util.h" using namespace std; static bool attributes_initialized; static struct perf_event_attr ticks_attr; static struct perf_event_attr page_faults_attr; static struct perf_event_attr hw_interrupts_attr; static struct perf_event_attr instructions_retired_attr; /* * Find out the cpu model using the cpuid instruction. * Full list of CPUIDs at http://sandpile.org/x86/cpuid.htm * Another list at * http://software.intel.com/en-us/articles/intel-architecture-and-processor-identification-with-cpuid-model-and-family-numbers */ enum CpuMicroarch { UnknownCpu, IntelMerom, IntelPenryn, IntelNehalem, IntelWestmere, IntelSandyBridge, IntelIvyBridge, IntelHaswell, IntelBroadwell, IntelSkylake }; struct PmuConfig { CpuMicroarch uarch; const char* name; unsigned rcb_cntr_event; unsigned rinsn_cntr_event; unsigned hw_intr_cntr_event; bool supported; }; // XXX please only edit this if you really know what you're doing. static const PmuConfig pmu_configs[] = { { IntelSkylake, "Intel Skylake", 0x5101c4, 0x5100c0, 0x5301cb, true }, { IntelBroadwell, "Intel Broadwell", 0x5101c4, 0x5100c0, 0x5301cb, true }, { IntelHaswell, "Intel Haswell", 0x5101c4, 0x5100c0, 0x5301cb, true }, { IntelIvyBridge, "Intel Ivy Bridge", 0x5101c4, 0x5100c0, 0x5301cb, true }, { IntelSandyBridge, "Intel Sandy Bridge", 0x5101c4, 0x5100c0, 0x5301cb, true }, { IntelNehalem, "Intel Nehalem", 0x5101c4, 0x5100c0, 0x50011d, true }, { IntelWestmere, "Intel Westmere", 0x5101c4, 0x5100c0, 0x50011d, true }, { IntelPenryn, "Intel Penryn", 0, 0, 0, false }, { IntelMerom, "Intel Merom", 0, 0, 0, false }, }; static string lowercase(const string& s) { string c = s; transform(c.begin(), c.end(), c.begin(), ::tolower); return c; } /** * Return the detected, known microarchitecture of this CPU, or don't * return; i.e. never return UnknownCpu. */ static CpuMicroarch get_cpu_microarch() { string forced_uarch = lowercase(Flags::get().forced_uarch); if (!forced_uarch.empty()) { for (size_t i = 0; i < array_length(pmu_configs); ++i) { const PmuConfig& pmu = pmu_configs[i]; string name = lowercase(pmu.name); if (name.npos != name.find(forced_uarch)) { LOG(info) << "Using forced uarch " << pmu.name; return pmu.uarch; } } FATAL() << "Forced uarch " << Flags::get().forced_uarch << " isn't known."; } unsigned int cpu_type, eax, ecx, edx; cpuid(CPUID_GETFEATURES, 0, &eax, &ecx, &edx); cpu_type = (eax & 0xF0FF0); switch (cpu_type) { case 0x006F0: case 0x10660: return IntelMerom; case 0x10670: case 0x106D0: return IntelPenryn; case 0x106A0: case 0x106E0: case 0x206E0: return IntelNehalem; case 0x20650: case 0x206C0: case 0x206F0: return IntelWestmere; case 0x206A0: case 0x206D0: case 0x306e0: return IntelSandyBridge; case 0x306A0: return IntelIvyBridge; case 0x306C0: case 0x306F0: case 0x40650: case 0x40660: return IntelHaswell; case 0x306D0: case 0x406F0: case 0x50660: return IntelBroadwell; case 0x506e0: return IntelSkylake; default: FATAL() << "CPU " << HEX(cpu_type) << " unknown."; return UnknownCpu; // not reached } } static void init_perf_event_attr(struct perf_event_attr* attr, perf_type_id type, unsigned config) { memset(attr, 0, sizeof(*attr)); attr->type = type; attr->size = sizeof(*attr); attr->config = config; // rr requires that its events count userspace tracee code // only. attr->exclude_kernel = 1; attr->exclude_guest = 1; } static void init_attributes() { if (attributes_initialized) { return; } attributes_initialized = true; CpuMicroarch uarch = get_cpu_microarch(); const PmuConfig* pmu = nullptr; for (size_t i = 0; i < array_length(pmu_configs); ++i) { if (uarch == pmu_configs[i].uarch) { pmu = &pmu_configs[i]; break; } } assert(pmu); if (!pmu->supported) { FATAL() << "Microarchitecture `" << pmu->name << "' currently unsupported."; } init_perf_event_attr(&ticks_attr, PERF_TYPE_RAW, pmu->rcb_cntr_event); init_perf_event_attr(&instructions_retired_attr, PERF_TYPE_RAW, pmu->rinsn_cntr_event); init_perf_event_attr(&hw_interrupts_attr, PERF_TYPE_RAW, pmu->hw_intr_cntr_event); // libpfm encodes the event with this bit set, so we'll do the // same thing. Unclear if necessary. hw_interrupts_attr.exclude_hv = 1; init_perf_event_attr(&page_faults_attr, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS); } PerfCounters::PerfCounters(pid_t tid) : tid(tid), started(false) { init_attributes(); } static ScopedFd start_counter(pid_t tid, int group_fd, struct perf_event_attr* attr) { int fd = syscall(__NR_perf_event_open, attr, tid, -1, group_fd, 0); if (0 > fd) { if (errno == EACCES) { FATAL() << "Permission denied to use 'perf_event_open'; are perf events " "enabled? Try 'perf record'."; } if (errno == ENOENT) { FATAL() << "Unable to open performance counter with 'perf_event_open'; " "are perf events enabled? Try 'perf record'."; } FATAL() << "Failed to initialize counter"; } if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0)) { FATAL() << "Failed to start counter"; } return fd; } void PerfCounters::reset(Ticks ticks_period) { assert(ticks_period >= 0); stop(); struct perf_event_attr attr = ticks_attr; attr.sample_period = ticks_period; fd_ticks = start_counter(tid, -1, &attr); struct f_owner_ex own; own.type = F_OWNER_TID; own.pid = tid; if (fcntl(fd_ticks, F_SETOWN_EX, &own)) { FATAL() << "Failed to SETOWN_EX ticks event fd"; } if (fcntl(fd_ticks, F_SETFL, O_ASYNC) || fcntl(fd_ticks, F_SETSIG, PerfCounters::TIME_SLICE_SIGNAL)) { FATAL() << "Failed to make ticks counter ASYNC with sig" << signal_name(PerfCounters::TIME_SLICE_SIGNAL); } if (extra_perf_counters_enabled()) { int group_leader = fd_ticks; fd_hw_interrupts = start_counter(tid, group_leader, &hw_interrupts_attr); fd_instructions_retired = start_counter(tid, group_leader, &instructions_retired_attr); fd_page_faults = start_counter(tid, group_leader, &page_faults_attr); } started = true; } void PerfCounters::stop() { if (!started) { return; } started = false; fd_ticks.close(); fd_page_faults.close(); fd_hw_interrupts.close(); fd_instructions_retired.close(); } static int64_t read_counter(ScopedFd& fd) { int64_t val; ssize_t nread = read(fd, &val, sizeof(val)); assert(nread == sizeof(val)); return val; } Ticks PerfCounters::read_ticks() { return started ? read_counter(fd_ticks) : 0; } PerfCounters::Extra PerfCounters::read_extra() { assert(extra_perf_counters_enabled()); Extra extra; if (started) { extra.page_faults = read_counter(fd_page_faults); extra.hw_interrupts = read_counter(fd_hw_interrupts); extra.instructions_retired = read_counter(fd_instructions_retired); } else { memset(&extra, 0, sizeof(extra)); } return extra; } rr-4.1.0/src/PerfCounters.h000066400000000000000000000045531265436462100155360ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_PERF_COUNTERS_H_ #define RR_PERF_COUNTERS_H_ #ifndef _GNU_SOURCE #define _GNU_SOURCE 1 #endif #include #include #include #include "ScopedFd.h" #include "Ticks.h" /** * A class encapsulating the performance counters we use to monitor * each task during recording and replay. * * Normally we monitor a single kind of event that we use as a proxy * for progress, which we call "ticks". Currently this is the count of retired * conditional branches. We support dispatching a signal when the counter * reaches a particular value. * * When extra_perf_counters_enabled() returns true, we monitor additional * counters of interest. */ class PerfCounters { public: /** * Create performance counters monitoring the given task. */ PerfCounters(pid_t tid); ~PerfCounters() { stop(); } // Change this to 'true' to enable perf counters that may be interesting // for experimentation, but aren't necessary for core functionality. static bool extra_perf_counters_enabled() { return false; } /** * Reset all counter values to 0 and program the counters to send * TIME_SLICE_SIGNAL when 'ticks_period' tick events have elapsed. (In reality * the hardware triggers its interrupt some time after that.) * This must be called while the task is stopped, and it must be called * before the task is allowed to run again. */ void reset(Ticks ticks_period); /** * Close the perfcounter fds. They will be automatically reopened if/when * reset is called again. */ void stop(); /** * Read the current value of the ticks counter. */ Ticks read_ticks(); /** * Return the fd we last used to monitor the ticks counter. */ int ticks_fd() const { return fd_ticks.get(); } /* This choice is fairly arbitrary; linux doesn't use SIGSTKFLT so we * hope that tracees don't either. */ enum { TIME_SLICE_SIGNAL = SIGSTKFLT }; struct Extra { Extra() : page_faults(0), hw_interrupts(0), instructions_retired(0) {} int64_t page_faults; int64_t hw_interrupts; int64_t instructions_retired; }; Extra read_extra(); private: pid_t tid; ScopedFd fd_ticks; ScopedFd fd_page_faults; ScopedFd fd_hw_interrupts; ScopedFd fd_instructions_retired; bool started; }; #endif /* RR_PERF_COUNTERS_H_ */ rr-4.1.0/src/PreserveFileMonitor.h000066400000000000000000000013651265436462100170600ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_PRESERVE_FILE_MONITOR_H_ #define RR_PRESERVE_FILE_MONITOR_H_ #include "FileMonitor.h" /** * A FileMonitor that does no monitoring of I/O itself, but prevents the file * descriptor from being closed (except via privileged syscalls made by * preload.c). * * The mere existence of this monitor disables syscall buffering for the fd, so * we get syscall traps for close() etc on the fd. Then * rec_prepare_syscall_arch calls allow_close() to check whether closing is * allowed. */ class PreserveFileMonitor : public FileMonitor { public: PreserveFileMonitor() {} virtual bool allow_close() { return false; } }; #endif /* RR_PRESERVE_FILE_MONITOR_H_ */ rr-4.1.0/src/PropertyTable.h000066400000000000000000000041221265436462100157030ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_PROPERTY_TABLE_H_ #define RR_PROPERTY_TABLE_H_ #include #include #include template class Property; /** * A PropertyTable is a heterogenously-typed set of property values. * It maps Property (effectively, property names) to values of * type T. It owns the property values. * Property values can be created, accessed and removed in a type-safe way * via the Property class. */ class PropertyTable { public: PropertyTable() {} ~PropertyTable() { for (auto& p : values) { p.first->destroy_property(p.second); } } private: template friend class Property; class PropertyBase { public: virtual void destroy_property(void* v) const = 0; }; std::unordered_map values; }; /** * Create an instance of this class to declare a property name. * The methods of this class call properties() on their Object parameter to * get the PropertyTable. */ template class Property : protected PropertyTable::PropertyBase { public: T& create(Object& o) const { assert(!get(o)); T* t = new T(); o.properties().values[this] = t; return *t; } T* get(Object& o) const { auto& properties = o.properties(); auto e = properties.values.find(this); if (e != properties.values.end()) { return static_cast(e->second); } return nullptr; } T& get_or_create(Object& o) const { T* t = get(o); if (t) { return *t; } return create(o); } std::unique_ptr remove(Object& o) const { auto& properties = o.properties(); auto e = properties.values.find(this); std::unique_ptr result; if (e != properties.values.end()) { result = std::unique_ptr(static_cast(e->second)); properties.values.erase(e); } return result; } protected: virtual void destroy_property(void* v) const { delete static_cast(v); } }; #endif /* RR_PROPERTY_TABLE_H_ */ rr-4.1.0/src/PsCommand.cc000066400000000000000000000056231265436462100151350ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include #include "Command.h" #include "main.h" #include "TraceStream.h" #include "TraceTaskEvent.h" using namespace std; class PsCommand : public Command { public: virtual int run(std::vector& args); protected: PsCommand(const char* name, const char* help) : Command(name, help) {} static PsCommand singleton; }; PsCommand PsCommand::singleton("ps", " rr ps []\n"); static void print_exec_cmd_line(const TraceTaskEvent& event, FILE* out) { bool first = true; for (auto& word : event.cmd_line()) { fprintf(out, "%s%s", first ? "" : " ", word.c_str()); first = false; } fprintf(out, "\n"); } static void update_tid_to_pid_map(std::map& tid_to_pid, const TraceTaskEvent& e) { if (e.is_fork()) { // Some kind of fork. This task is its own pid. tid_to_pid[e.tid()] = e.tid(); } else if (e.type() == TraceTaskEvent::CLONE) { // thread clone. Record thread's pid. tid_to_pid[e.tid()] = tid_to_pid[e.parent_tid()]; } } static int ps(const string& trace_dir, FILE* out) { TraceReader trace(trace_dir); fprintf(out, "PID\tPPID\tCMD\n"); vector events; while (trace.good()) { events.push_back(trace.read_task_event()); } if (events.empty() || events[0].type() != TraceTaskEvent::EXEC) { fprintf(stderr, "Invalid trace\n"); return 1; } std::map tid_to_pid; fprintf(out, "%d\t--\t", events[0].tid()); print_exec_cmd_line(events[0], out); tid_to_pid[events[0].tid()] = events[0].tid(); for (size_t i = 1; i < events.size(); ++i) { auto& e = events[i]; update_tid_to_pid_map(tid_to_pid, e); if (e.is_fork()) { fprintf(out, "%d\t%d\t", e.tid(), tid_to_pid[e.parent_tid()]); // Look ahead for an EXEC in one of this process' threads. std::map tmp_tid_to_pid = tid_to_pid; bool found_exec = false; for (size_t j = i + 1; j < events.size(); ++j) { auto& ej = events[j]; if (tmp_tid_to_pid[ej.tid()] == tmp_tid_to_pid[e.tid()] && ej.type() == TraceTaskEvent::EXEC) { print_exec_cmd_line(events[j], out); found_exec = true; break; } update_tid_to_pid_map(tmp_tid_to_pid, ej); if (ej.tid() == e.tid() && ej.type() == TraceTaskEvent::EXIT) { break; } } if (!found_exec) { // The main thread exited. All other threads must too, so there // is no more opportunity for e's pid to exec. fprintf(out, "(forked without exec)\n"); } } } return 0; } int PsCommand::run(std::vector& args) { while (parse_global_option(args)) { } string trace_dir; if (!parse_optional_trace_dir(args, &trace_dir)) { print_help(stderr); return 1; } return ps(trace_dir, stdout); } rr-4.1.0/src/RecordCommand.cc000066400000000000000000000166331265436462100157740ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "RecordCommand.h" #include #include #include "preload/preload_interface.h" #include "Flags.h" #include "kernel_metadata.h" #include "log.h" #include "main.h" #include "RecordSession.h" #include "util.h" using namespace std; RecordCommand RecordCommand::singleton( "record", " rr record [OPTION]... [exe-args]...\n" " -b, --force-syscall-buffer force the syscall buffer preload library\n" " to be used, even if that's probably a bad\n" " idea\n" " -c, --num-cpu-ticks= maximum number of 'CPU ticks' (currently \n" " retired conditional branches) to allow a \n" " task to run before interrupting it\n" " -e, --num-events= maximum number of events (syscall \n" " enter/exit, signal, CPU interrupt, ...) \n" " to allow a task before descheduling it\n" " -i, --ignore-signal= block from being delivered to \n" " tracees. Probably only useful for unit \n" " tests.\n" " -n, --no-syscall-buffer disable the syscall buffer preload \n" " library even if it would otherwise be used\n" " -u, --cpu-unbound allow tracees to run on any virtual CPU.\n" " Default is to bind to CPU 0. This option\n" " can cause replay divergence: use with\n" " caution.\n" " -v, --env=NAME=VALUE value to add to the environment of the\n" " tracee. There can be any number of these.\n"); struct RecordFlags { vector extra_env; /* Max counter value before the scheduler interrupts a tracee. */ Ticks max_ticks; /* Max number of trace events before the scheduler * de-schedules a tracee. */ TraceFrame::Time max_events; /* Whenever |ignore_sig| is pending for a tracee, decline to * deliver it. */ int ignore_sig; /* When true, use syscall buffering optimization during recording. */ bool use_syscall_buffer; /* True when tracee processes in record and replay are allowed * to run on any logical CPU. */ bool cpu_unbound; RecordFlags() : max_ticks(Scheduler::DEFAULT_MAX_TICKS), max_events(Scheduler::DEFAULT_MAX_EVENTS), ignore_sig(0), use_syscall_buffer(true), cpu_unbound(false) {} }; static bool parse_record_arg(std::vector& args, RecordFlags& flags) { if (parse_global_option(args)) { return true; } static const OptionSpec options[] = { { 'b', "force-syscall-buffer", NO_PARAMETER }, { 'i', "ignore-signal", HAS_PARAMETER }, { 'c', "num-cpu-ticks", HAS_PARAMETER }, { 'e', "num-events", HAS_PARAMETER }, { 'n', "no-syscall-buffer", NO_PARAMETER }, { 'u', "cpu-unbound", NO_PARAMETER }, { 'v', "env", HAS_PARAMETER } }; ParsedOption opt; auto args_copy = args; if (!Command::parse_option(args_copy, options, &opt)) { return false; } switch (opt.short_name) { case 'b': flags.use_syscall_buffer = true; break; case 'c': if (!opt.verify_valid_int(1, INT64_MAX)) { return false; } flags.max_ticks = opt.int_value; break; case 'e': if (!opt.verify_valid_int(1, UINT32_MAX)) { return false; } flags.max_events = opt.int_value; ; break; case 'i': if (!opt.verify_valid_int(1, _NSIG - 1)) { return false; } flags.ignore_sig = opt.int_value; break; case 'n': flags.use_syscall_buffer = false; break; case 'u': flags.cpu_unbound = true; break; case 'v': flags.extra_env.push_back(opt.value); break; default: assert(0 && "Unknown option"); } args = args_copy; return true; } static bool term_request; /** * A terminating signal was received. Set the |term_request| bit to * terminate the trace at the next convenient point. * * If there's already a term request pending, then assume rr is wedged * and abort(). */ static void handle_SIGTERM(int sig) { if (term_request) { FATAL() << "Received termsig while an earlier one was pending. We're " "probably wedged."; } LOG(info) << "Received termsig " << signal_name(sig) << ", requesting shutdown ...\n"; term_request = true; } static void install_signal_handlers(void) { struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_handler = handle_SIGTERM; sigaction(SIGTERM, &sa, nullptr); sa.sa_handler = SIG_IGN; sigaction(SIGINT, &sa, nullptr); } static void setup_session_from_flags(RecordSession& session, const RecordFlags& flags) { session.scheduler().set_max_ticks(flags.max_ticks); session.scheduler().set_max_events(flags.max_events); session.set_ignore_sig(flags.ignore_sig); } static int record(const vector& args, const RecordFlags& flags) { LOG(info) << "Start recording..."; auto session = RecordSession::create( args, (flags.cpu_unbound ? RecordSession::CPU_UNBOUND : 0) | (flags.use_syscall_buffer ? 0 : RecordSession::DISABLE_SYSCALL_BUF), flags.extra_env); setup_session_from_flags(*session, flags); // Install signal handlers after creating the session, to ensure they're not // inherited by the tracee. install_signal_handlers(); RecordSession::RecordResult step_result; do { step_result = session->record_step(); } while (step_result.status == RecordSession::STEP_CONTINUE && !term_request); session->terminate_recording(); switch (step_result.status) { case RecordSession::STEP_CONTINUE: // SIGINT or something like that interrupted us. return 0x80 | SIGINT; case RecordSession::STEP_EXITED: return step_result.exit_code; case RecordSession::STEP_EXEC_FAILED: fprintf(stderr, "\n" "rr: error:\n" " Unexpected `write()' call from first tracee process.\n" " Most likely, the executable image `%s' is 64-bit, doesn't " "exist, or\n" " isn't in your $PATH. Terminating recording.\n" "\n", session->trace_writer().initial_exe().c_str()); return EX_NOINPUT; case RecordSession::STEP_PERF_COUNTERS_UNAVAILABLE: fprintf(stderr, "\n" "rr: internal recorder error:\n" " Performance counter doesn't seem to be working. Are " "you perhaps\n" " running rr in a VM but didn't enable perf-counter " "virtualization?\n"); return EX_UNAVAILABLE; default: assert(0 && "Unknown exit status"); return -1; } } int RecordCommand::run(std::vector& args) { if (getenv("RUNNING_UNDER_RR")) { fprintf(stderr, "rr: cannot run rr recording under rr. Exiting.\n"); return 1; } RecordFlags flags; while (parse_record_arg(args, flags)) { } if (!verify_not_option(args) || args.size() == 0) { print_help(stderr); return 1; } assert_prerequisites(flags.use_syscall_buffer); check_performance_settings(); return record(args, flags); } rr-4.1.0/src/RecordCommand.h000066400000000000000000000007321265436462100156270ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_RECORD_COMMAND_H_ #define RR_RECORD_COMMAND_H_ #include "Command.h" class RecordCommand : public Command { public: virtual int run(std::vector& args); static RecordCommand* get() { return &singleton; } protected: RecordCommand(const char* name, const char* help) : Command(name, help) {} static RecordCommand singleton; }; #endif // RR_RECORD_COMMAND_H_ rr-4.1.0/src/RecordSession.cc000066400000000000000000001532301265436462100160340ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "RecordSession" #include "RecordSession.h" #include #include #include #include "AutoRemoteSyscalls.h" #include "kernel_metadata.h" #include "log.h" #include "record_signal.h" #include "record_syscall.h" #include "seccomp-bpf.h" #include "task.h" // Undef si_addr_lsb since it's an alias for a field name that doesn't exist, // and we need to use the actual field name. #ifdef si_addr_lsb #undef si_addr_lsb #endif using namespace rr; using namespace std; /** * Create a pulseaudio client config file with shm disabled. That may * be the cause of a mysterious divergence. Return an envpair to set * in the tracee environment. */ static string create_pulseaudio_config() { // TODO let PULSE_CLIENTCONFIG env var take precedence. static const char pulseaudio_config_path[] = "/etc/pulse/client.conf"; if (access(pulseaudio_config_path, R_OK)) { // Assume pulseaudio isn't installed return ""; } char tmp[] = "/tmp/rr-pulseaudio-client-conf-XXXXXX"; int fd = mkstemp(tmp); fcntl(fd, F_SETFD, FD_CLOEXEC); unlink(tmp); // The fd is deliberately leaked so that the /proc/fd link below works // indefinitely. But we stop it leaking into tracee processes. stringstream procfile; procfile << "/proc/" << getpid() << "/fd/" << fd; // Running cp passing the procfile path under Docker fails for some // odd filesystem-related reason, so just read/write the contents. int pulse_config_fd = open(pulseaudio_config_path, O_RDONLY, 0); if (pulse_config_fd < 0) { FATAL() << "Failed to open pulseaudio config file: '" << pulseaudio_config_path << "'"; } char buf[BUFSIZ]; while (true) { ssize_t size = read(pulse_config_fd, buf, BUFSIZ); if (size == 0) { break; } else if (size < 0) { FATAL() << "Failed to read pulseaudio config file"; } if (write(fd, buf, size) != size) { FATAL() << "Failed to write temp pulseaudio config file to " << procfile.str(); } } close(pulse_config_fd); char disable_shm[] = "disable-shm = true\n"; ssize_t nwritten = write(fd, disable_shm, sizeof(disable_shm) - 1); if (nwritten != sizeof(disable_shm) - 1) { FATAL() << "Failed to append '" << disable_shm << "' to " << procfile.str(); } stringstream envpair; envpair << "PULSE_CLIENTCONFIG=" << procfile.str(); return envpair.str(); } static int get_num_cpus() { int cpus = (int)sysconf(_SC_NPROCESSORS_ONLN); return cpus > 0 ? cpus : 1; } /** * Pick a CPU at random to bind to, unless --cpu-unbound has been given, * in which case we return -1. */ static int choose_cpu(uint32_t flags) { if (flags & RecordSession::CPU_UNBOUND) { return -1; } // Pin tracee tasks to logical CPU 0, both in // recording and replay. Tracees can see which HW // thread they're running on by asking CPUID, and we // don't have a way to emulate it yet. So if a tracee // happens to be scheduled on a different core in // recording than replay, it can diverge. (And // indeed, has been observed to diverge in practice, // in glibc.) // // Note that we will pin both the tracee processes *and* // the tracer process. This ends up being a tidy // performance win in certain circumstances, // presumably due to cheaper context switching and/or // better interaction with CPU frequency scaling. return random() % get_num_cpus(); } template static remote_ptr mask_low_bit(remote_ptr p) { return p.as_int() & ~uintptr_t(1); } template static void record_robust_futex_change( Task* t, const typename Arch::robust_list_head& head, remote_ptr base) { if (base.is_null()) { return; } remote_ptr futex_void_ptr = base + head.futex_offset; auto futex_ptr = futex_void_ptr.cast(); // We can't just record the current futex value because at this point // in task exit the robust futex handling has not happened yet. So we have // to emulate what the kernel will do! bool ok = true; uint32_t val = t->read_mem(futex_ptr, &ok); if (!ok) { return; } if (pid_t(val & FUTEX_TID_MASK) != t->own_namespace_rec_tid) { return; } val = (val & FUTEX_WAITERS) | FUTEX_OWNER_DIED; t->record_local(futex_ptr, &val); } /** * Any user-space writes performed by robust futex handling are captured here. * They must be emulated during replay; the kernel will not do it for us * during replay because the TID value in each futex is the recorded * TID, not the actual TID of the dying task. */ template static void record_robust_futex_changes_arch(Task* t) { if (t->vm()->task_set().size() == 1) { // This address space is going away --- actually, has probably already // gone away. Any robust futex cleanup will not be observable. return; } auto head_ptr = t->robust_list().cast(); if (head_ptr.is_null()) { return; } ASSERT(t, t->robust_list_len() == sizeof(typename Arch::robust_list_head)); bool ok = true; auto head = t->read_mem(head_ptr, &ok); if (!ok) { return; } record_robust_futex_change(t, head, mask_low_bit(head.list_op_pending.rptr())); for (auto current = mask_low_bit(head.list.next.rptr()); current.as_int() != head_ptr.as_int();) { record_robust_futex_change(t, head, current); auto next = t->read_mem(current, &ok); if (!ok) { return; } current = mask_low_bit(next.next.rptr()); } } static void record_robust_futex_changes(Task* t) { RR_ARCH_FUNCTION(record_robust_futex_changes_arch, t->arch(), t); } /** * Return true if we handle a ptrace exit event for task t. When this returns * true, t has been deleted and cannot be referenced again. */ static bool handle_ptrace_exit_event(Task* t) { if (t->ptrace_event() != PTRACE_EVENT_EXIT) { return false; } if (t->stable_exit) { LOG(debug) << "stable exit"; } else { LOG(warn) << "unstable exit; may misrecord CLONE_CHILD_CLEARTID memory race"; t->destabilize_task_group(); } record_robust_futex_changes(t); EventType ev = t->unstable ? EV_UNSTABLE_EXIT : EV_EXIT; t->record_event(Event(ev, NO_EXEC_INFO, t->arch())); t->record_session().trace_writer().write_task_event(TraceTaskEvent(t->tid)); delete t; return true; } static void handle_seccomp_traced_syscall( Task* t, RecordSession::StepState* step_state) { int syscallno = t->regs().original_syscallno(); if (syscallno < 0) { // negative syscall numbers after a SECCOMP event // are treated as "skip this syscall". There will be one syscall event // reported instead of two. So, record an enter-syscall event now // and treat the other event as the exit. t->emulate_syscall_entry(t->regs()); t->push_event(SyscallEvent(syscallno, t->arch())); ASSERT(t, EV_SYSCALL == t->ev().type()); t->ev().Syscall().state = ENTERING_SYSCALL; t->record_current_event(); // Don't continue yet. At the next iteration of record_step, we'll // enter syscall_state_changed and that will trigger a continue to // the syscall exit. step_state->continue_type = RecordSession::DONT_CONTINUE; } else { // The next continue needs to be a PTRACE_SYSCALL to observe // the enter-syscall event. step_state->continue_type = RecordSession::CONTINUE_SYSCALL; } } static void handle_seccomp_trap(Task* t, RecordSession::StepState* step_state, uint16_t seccomp_data) { int syscallno = t->regs().original_syscallno(); t->emulate_syscall_entry(t->regs()); if (!t->is_in_untraced_syscall()) { t->push_event(SyscallEvent(syscallno, t->arch())); ASSERT(t, EV_SYSCALL == t->ev().type()); t->ev().Syscall().state = ENTERING_SYSCALL; t->record_current_event(); } Registers r = t->regs(); // Use NativeArch here because different versions of system headers // have inconsistent field naming. union { NativeArch::siginfo_t native_api; siginfo_t linux_api; } si; memset(&si, 0, sizeof(si)); si.native_api.si_signo = SIGSYS; si.native_api.si_errno = seccomp_data; si.native_api.si_code = SYS_SECCOMP; switch (r.arch()) { case x86: si.native_api._sifields._sigsys._arch = AUDIT_ARCH_I386; break; case x86_64: si.native_api._sifields._sigsys._arch = AUDIT_ARCH_X86_64; break; default: assert(0 && "Unknown architecture"); break; } si.native_api._sifields._sigsys._syscall = syscallno; // We don't set call_addr here, because the current ip() might not be the // ip() at which we deliver the signal, and they must match. In particular // this event might be triggered during syscallbuf processing but delivery // delayed until we exit the syscallbuf code. t->stash_synthetic_sig(si.linux_api); // Tests show that the current registers are preserved (on x86, eax/rax // retains the syscall number). r.set_syscallno(syscallno); // Cause kernel processing to skip the syscall r.set_original_syscallno(-1); t->set_regs(r); // Don't continue yet. At the next iteration of record_step, if we // recorded the syscall-entry we'll enter syscall_state_changed and // that will trigger a continue to the syscall exit. step_state->continue_type = RecordSession::DONT_CONTINUE; } static void handle_seccomp_errno(Task* t, RecordSession::StepState* step_state, uint16_t seccomp_data) { int syscallno = t->regs().original_syscallno(); t->emulate_syscall_entry(t->regs()); if (!t->is_in_untraced_syscall()) { t->push_event(SyscallEvent(syscallno, t->arch())); ASSERT(t, EV_SYSCALL == t->ev().type()); t->ev().Syscall().state = ENTERING_SYSCALL; t->record_current_event(); } Registers r = t->regs(); // Cause kernel processing to skip the syscall r.set_original_syscallno(-1); r.set_syscall_result(-seccomp_data); t->set_regs(r); // Don't continue yet. At the next iteration of record_step, if we // recorded the syscall-entry we'll enter syscall_state_changed and // that will trigger a continue to the syscall exit. step_state->continue_type = RecordSession::DONT_CONTINUE; } bool RecordSession::handle_ptrace_event(Task* t, StepState* step_state) { int event = t->ptrace_event(); if (event == PTRACE_EVENT_NONE) { return false; } LOG(debug) << " " << t->tid << ": handle_ptrace_event " << event << ": event " << t->ev(); switch (event) { case PTRACE_EVENT_SECCOMP_OBSOLETE: case PTRACE_EVENT_SECCOMP: { t->seccomp_bpf_enabled = true; uint16_t seccomp_data = t->get_ptrace_eventmsg_seccomp_data(); if (seccomp_data == SECCOMP_RET_DATA) { handle_seccomp_traced_syscall(t, step_state); } else { uint32_t real_result = seccomp_filter_rewriter().map_filter_data_to_real_result( seccomp_data); uint16_t real_result_data = real_result & SECCOMP_RET_DATA; switch (real_result & SECCOMP_RET_ACTION) { case SECCOMP_RET_TRAP: handle_seccomp_trap(t, step_state, real_result_data); break; case SECCOMP_RET_ERRNO: handle_seccomp_errno(t, step_state, real_result_data); break; default: ASSERT(t, false) << "Seccomp result not handled"; break; } } break; } case PTRACE_EVENT_CLONE: { remote_ptr stack; remote_ptr* ptid_not_needed = nullptr; remote_ptr tls; remote_ptr ctid; extract_clone_parameters(t, &stack, ptid_not_needed, &tls, &ctid); // fork can never share these resources, only // copy, so the flags here aren't meaningful for it. unsigned long flags_arg = is_clone_syscall(t->regs().original_syscallno(), t->arch()) ? t->regs().arg1() : 0; // Ideally we'd just use t->get_ptrace_eventmsg_pid() here, but // kernels failed to translate that value from other pid namespaces to // our pid namespace until June 2014: // https://github.com/torvalds/linux/commit/4e52365f279564cef0ddd41db5237f0471381093 pid_t new_tid; if (flags_arg & CLONE_THREAD) { new_tid = t->find_newborn_thread(); } else { new_tid = t->find_newborn_child_process(); } Task* new_task = clone(t, clone_flags_to_task_flags(flags_arg), stack, tls, ctid, new_tid); rec_set_syscall_new_task(t, new_task); { AutoRemoteSyscalls remote(new_task); new_task->own_namespace_rec_tid = remote.infallible_syscall( syscall_number_for_gettid(new_task->arch())); } // Skip past the ptrace event. step_state->continue_type = CONTINUE_SYSCALL; break; } case PTRACE_EVENT_FORK: { pid_t new_tid = t->find_newborn_child_process(); Task* new_task = clone(t, 0, nullptr, nullptr, nullptr, new_tid); rec_set_syscall_new_task(t, new_task); // Skip past the ptrace event. step_state->continue_type = CONTINUE_SYSCALL; break; } case PTRACE_EVENT_EXEC: /* The initial tracee, if it's still around, is now * for sure not running in the initial rr address * space, so we can unblock signals. */ can_deliver_signals = true; t->post_exec(); // Skip past the ptrace event. step_state->continue_type = CONTINUE_SYSCALL; break; case PTRACE_EVENT_STOP: last_task_switchable = ALLOW_SWITCH; step_state->continue_type = DONT_CONTINUE; break; // We map vfork() to fork() so we don't expect to see these: case PTRACE_EVENT_VFORK: case PTRACE_EVENT_VFORK_DONE: // This is handled separately: case PTRACE_EVENT_EXIT: default: ASSERT(t, false) << "Unhandled ptrace event " << ptrace_event_name(event) << "(" << event << ")"; break; } return true; } static void debug_exec_state(const char* msg, Task* t) { LOG(debug) << msg << ": status=" << HEX(t->status()) << " pevent=" << t->ptrace_event(); } void RecordSession::task_continue(Task* t, const StepState& step_state) { ASSERT(t, step_state.continue_type != DONT_CONTINUE); bool may_restart = t->at_may_restart_syscall(); if (step_state.continue_sig) { LOG(debug) << " delivering " << signal_name(step_state.continue_sig) << " to " << t->tid; } if (may_restart && t->seccomp_bpf_enabled) { LOG(debug) << " PTRACE_SYSCALL to possibly-restarted " << t->ev(); } if (!t->vm()->first_run_event()) { t->vm()->set_first_run_event(trace_writer().time()); } TicksRequest max_ticks = (TicksRequest)t->record_session().scheduler().max_ticks(); if (!t->seccomp_bpf_enabled || CONTINUE_SYSCALL == step_state.continue_type || may_restart) { /* We won't receive PTRACE_EVENT_SECCOMP events until * the seccomp filter is installed by the * syscall_buffer lib in the child, therefore we must * record in the traditional way (with PTRACE_SYSCALL) * until it is installed. */ t->resume_execution(RESUME_SYSCALL, RESUME_NONBLOCKING, step_state.continue_type == CONTINUE_SYSCALL ? RESUME_NO_TICKS : max_ticks, step_state.continue_sig); } else { /* When the seccomp filter is on, instead of capturing * syscalls by using PTRACE_SYSCALL, the filter will * generate the ptrace events. This means we allow the * process to run using PTRACE_CONT, and rely on the * seccomp filter to generate the special * PTRACE_EVENT_SECCOMP event once a syscall happens. * This event is handled here by simply allowing the * process to continue to the actual entry point of * the syscall (using cont_syscall_block()) and then * using the same logic as before. */ t->resume_execution(RESUME_CONT, RESUME_NONBLOCKING, max_ticks, step_state.continue_sig); } } /** * Step |t| forward until the tracee syscall that disarms the desched * event. If a signal becomes pending in the interim, we stash it. * This allows the caller to deliver the signal after this returns. * (In reality the desched event will already have been disarmed before we * enter this function.) */ static void advance_to_disarm_desched_syscall(Task* t) { int old_sig = 0; LOG(debug) << "desched: DISARMING_DESCHED_EVENT"; /* TODO: send this through main loop. */ /* TODO: mask off signals and avoid this loop. */ do { t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_UNLIMITED_TICKS); /* We can safely ignore TIME_SLICE_SIGNAL while trying to * reach the disarm-desched ioctl: once we reach it, * the desched'd syscall will be "done" and the tracee * will be at a preemption point. In fact, we *want* * to ignore this signal. Syscalls like read() can * have large buffers passed to them, and we have to * copy-out the buffered out data to the user's * buffer. This happens in the interval where we're * reaching the disarm-desched ioctl, so that code is * susceptible to receiving TIME_SLICE_SIGNAL. */ int sig = t->pending_sig(); if (PerfCounters::TIME_SLICE_SIGNAL == sig) { continue; } // We should not receive SYSCALLBUF_DESCHED_SIGNAL since it should already // have been disarmed. ASSERT(t, SYSCALLBUF_DESCHED_SIGNAL != sig); if (sig && sig == old_sig) { LOG(debug) << " coalescing pending " << signal_name(sig); continue; } if (sig) { LOG(debug) << " " << signal_name(sig) << " now pending"; t->stash_sig(); } } while (!t->is_disarm_desched_event_syscall()); // Exit the syscall. t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS); } /** * |t| is at a desched event and some relevant aspect of its state * changed. (For now, changes except the original desched'd syscall * being restarted.) */ void RecordSession::desched_state_changed(Task* t) { LOG(debug) << "desched: IN_SYSCALL"; /* We need to ensure that the syscallbuf code doesn't * try to commit the current record; we've already * recorded that syscall. The following event sets * the abort-commit bit. */ t->syscallbuf_hdr->abort_commit = 1; t->record_event(Event(EV_SYSCALLBUF_ABORT_COMMIT, NO_EXEC_INFO, t->arch())); advance_to_disarm_desched_syscall(t); t->pop_desched(); /* The tracee has just finished sanity-checking the * aborted record, and won't touch the syscallbuf * during this (aborted) transaction again. So now * is a good time for us to reset the record counter. */ t->delay_syscallbuf_reset = false; ASSERT(t, t->syscallbuf_hdr); // Run the syscallbuf exit hook. This ensures we'll be able to reset // the syscallbuf before trying to buffer another syscall. t->syscallbuf_hdr->notify_on_syscall_hook_exit = true; // We were just descheduled for potentially a long // time, and may have just had a signal become // pending. Ensure we get another chance to run. last_task_switchable = PREVENT_SWITCH; } static void syscall_not_restarted(Task* t) { LOG(debug) << " " << t->tid << ": popping abandoned interrupted " << t->ev() << "; pending events:"; #ifdef DEBUGTAG t->log_pending_events(); #endif t->pop_syscall_interruption(); t->record_event( Event(EV_INTERRUPTED_SYSCALL_NOT_RESTARTED, NO_EXEC_INFO, t->arch())); } /** * "Thaw" a frozen interrupted syscall if |t| is restarting it. * Return true if a syscall is indeed restarted. * * A postcondition of this function is that |t->ev| is no longer a * syscall interruption, whether or whether not a syscall was * restarted. */ static bool maybe_restart_syscall(Task* t) { if (is_restart_syscall_syscall(t->regs().original_syscallno(), t->arch())) { LOG(debug) << " " << t->tid << ": SYS_restart_syscall'ing " << t->ev(); } if (t->is_syscall_restart()) { t->ev().transform(EV_SYSCALL); Registers regs = t->regs(); regs.set_original_syscallno(t->ev().Syscall().regs.original_syscallno()); t->set_regs(regs); return true; } if (EV_SYSCALL_INTERRUPTION == t->ev().type()) { syscall_not_restarted(t); } return false; } /** * After a SYS_sigreturn "exit" of task |t| with return value |ret|, * check to see if there's an interrupted syscall that /won't/ be * restarted, and if so, pop it off the pending event stack. */ static void maybe_discard_syscall_interruption(Task* t, intptr_t ret) { int syscallno; if (EV_SYSCALL_INTERRUPTION != t->ev().type()) { /* We currently don't track syscalls interrupted with * ERESTARTSYS or ERESTARTNOHAND, so it's possible for * a sigreturn not to affect the event stack. */ LOG(debug) << " (no interrupted syscall to retire)"; return; } syscallno = t->ev().Syscall().number; if (0 > ret) { syscall_not_restarted(t); } else { ASSERT(t, syscallno == ret) << "Interrupted call was " << t->syscall_name(syscallno) << " and sigreturn claims to be restarting " << t->syscall_name(ret); } } /** * Copy the registers used for syscall arguments (not including * syscall number) from |from| to |to|. */ static void copy_syscall_arg_regs(Registers* to, const Registers& from) { to->set_arg1(from.arg1()); to->set_arg2(from.arg2()); to->set_arg3(from.arg3()); to->set_arg4(from.arg4()); to->set_arg5(from.arg5()); to->set_arg6(from.arg6()); } void RecordSession::syscall_state_changed(Task* t, StepState* step_state) { switch (t->ev().Syscall().state) { case ENTERING_SYSCALL: { debug_exec_state("EXEC_SYSCALL_ENTRY", t); if (!t->ev().Syscall().is_restart) { /* Save a copy of the arg registers so that we * can use them to detect later restarted * syscalls, if this syscall ends up being * restarted. We have to save the registers * in this rather awkward place because we * need the original registers; the restart * (if it's not a SYS_restart_syscall restart) * will use the original registers. */ t->ev().Syscall().regs = t->regs(); } last_task_switchable = rec_prepare_syscall(t); debug_exec_state("after cont", t); t->ev().Syscall().state = PROCESSING_SYSCALL; // Resume the syscall execution in the kernel context. step_state->continue_type = CONTINUE_SYSCALL; if (t->session().can_validate() && Flags::get().check_cached_mmaps) { t->vm()->verify(t); } if (t->desched_rec() && t->is_in_untraced_syscall() && t->ev().Syscall().is_restart && t->has_stashed_sig()) { // We have a signal to deliver but we're about to restart an untraced // syscall that may block and the desched event has been disarmed. // Rearm the desched event so if the syscall blocks, it will be // interrupted and we'll have a chance to deliver our signal. arm_desched_event(t); } return; } case PROCESSING_SYSCALL: debug_exec_state("EXEC_IN_SYSCALL", t); // Linux kicks tasks out of syscalls before delivering // signals. ASSERT(t, !t->pending_sig()) << "Signal " << signal_name(t->pending_sig()) << " pending while in syscall???"; t->ev().Syscall().state = EXITING_SYSCALL; step_state->continue_type = DONT_CONTINUE; return; case EXITING_SYSCALL: { debug_exec_state("EXEC_SYSCALL_DONE", t); assert(t->pending_sig() == 0); int syscallno = t->ev().Syscall().number; intptr_t retval = t->regs().syscall_result_signed(); if (t->desched_rec()) { // If we enabled the desched event above, disable it. disarm_desched_event(t); // Record storing the return value in the syscallbuf record, where // we expect to find it during replay. auto child_rec = ((t->syscallbuf_child + 1).cast() + t->syscallbuf_hdr->num_rec_bytes) .cast(); int64_t ret = retval; t->record_local(REMOTE_PTR_FIELD(child_rec, ret), &ret); } // sigreturn is a special snowflake, because it // doesn't actually return. Instead, it undoes the // setup for signal delivery, which possibly includes // preparing the tracee for a restart-syscall. So we // take this opportunity to possibly pop an // interrupted-syscall event. if (is_sigreturn(syscallno, t->arch())) { ASSERT(t, t->regs().original_syscallno() == -1); t->record_current_event(); t->pop_syscall(); // We've finished processing this signal now. t->pop_signal_handler(); t->record_event(Event(EV_EXIT_SIGHANDLER, NO_EXEC_INFO, t->arch())); maybe_discard_syscall_interruption(t, retval); if (EV_DESCHED == t->ev().type()) { LOG(debug) << " exiting desched critical section"; desched_state_changed(t); } // XXX probably not necessary to leave the tracee unswitchable return; } LOG(debug) << " original_syscallno:" << t->regs().original_syscallno() << " (" << t->syscall_name(syscallno) << "); return val:" << t->regs().syscall_result(); /* a syscall_restart ending is equivalent to the * restarted syscall ending */ if (t->ev().Syscall().is_restart) { LOG(debug) << " exiting restarted " << t->syscall_name(syscallno); } /* TODO: is there any reason a restart_syscall can't * be interrupted by a signal and itself restarted? */ bool may_restart = !is_restart_syscall_syscall(syscallno, t->arch()) // SYS_pause is either interrupted or // never returns. It doesn't restart. && !is_pause_syscall(syscallno, t->arch()) && t->regs().syscall_may_restart(); /* no need to process the syscall in case its * restarted this will be done in the exit from the * restart_syscall */ if (!may_restart) { rec_process_syscall(t); if (t->session().can_validate() && Flags::get().check_cached_mmaps) { t->vm()->verify(t); } } else { LOG(debug) << " may restart " << t->syscall_name(syscallno) << " (from retval " << retval << ")"; rec_prepare_restart_syscall(t); /* If we may restart this syscall, we've most * likely fudged some of the argument * registers with scratch pointers. We don't * want to record those fudged registers, * because scratch doesn't exist in replay. * So cover our tracks here. */ Registers r = t->regs(); copy_syscall_arg_regs(&r, t->ev().Syscall().regs); t->set_regs(r); } t->record_current_event(); /* If we're not going to restart this syscall, we're * done with it. But if we are, "freeze" it on the * event stack until the execution point where it * might be restarted. */ if (!may_restart) { t->pop_syscall(); if (EV_DESCHED == t->ev().type()) { LOG(debug) << " exiting desched critical section"; desched_state_changed(t); } } else { t->ev().transform(EV_SYSCALL_INTERRUPTION); t->ev().Syscall().is_restart = true; } return; } default: FATAL() << "Unknown exec state " << t->ev().Syscall().state; } } /** If the perf counters seem to be working return, otherwise don't return. */ void RecordSession::check_perf_counters_working(Task* t, RecordResult* step_result) { if (can_deliver_signals || !is_write_syscall(t->ev().Syscall().number, t->arch())) { return; } int fd = t->regs().arg1_signed(); if (-1 != fd && Flags::get().force_things) { LOG(warn) << "Unexpected write(" << fd << ") call"; return; } if (-1 != fd) { step_result->status = RecordSession::STEP_EXEC_FAILED; return; } Ticks ticks = t->tick_count(); LOG(debug) << "ticks on entry to dummy write: " << ticks; if (ticks == 0) { step_result->status = RecordSession::STEP_PERF_COUNTERS_UNAVAILABLE; return; } } template static void assign_sigval(typename Arch::sigval_t& to, const NativeArch::sigval_t& from) { // si_ptr/si_int are a union and we don't know which part is valid. // The only case where it matters is when we're mapping 64->32, in which // case we can just assign the ptr first (which is bigger) and then the // int (to be endian-independent). to.sival_ptr = from.sival_ptr.rptr(); to.sival_int = from.sival_int; } /** * Take a NativeArch::siginfo_t& here instead of siginfo_t because different * versions of system headers have inconsistent field naming. */ template static void setup_sigframe_siginfo_arch(Task* t, const NativeArch::siginfo_t& siginfo) { remote_ptr dest; switch (Arch::arch()) { case x86: { auto p = t->regs().sp().cast() + 2; dest = t->read_mem(p); break; } case x86_64: dest = t->regs().si(); break; default: assert(0 && "Unknown architecture"); break; } typename Arch::siginfo_t si = t->read_mem(dest); // Copying this structure field-by-field instead of just memcpy'ing // siginfo into si serves two purposes: performs 64->32 conversion if // necessary, and ensures garbage in any holes in signfo isn't copied to the // tracee. si.si_signo = siginfo.si_signo; si.si_errno = siginfo.si_errno; si.si_code = siginfo.si_code; switch (siginfo.si_code) { case SI_USER: case SI_TKILL: si._sifields._kill.si_pid_ = siginfo._sifields._kill.si_pid_; si._sifields._kill.si_uid_ = siginfo._sifields._kill.si_uid_; break; case SI_QUEUE: case SI_MESGQ: si._sifields._rt.si_pid_ = siginfo._sifields._rt.si_pid_; si._sifields._rt.si_uid_ = siginfo._sifields._rt.si_uid_; assign_sigval(si._sifields._rt.si_sigval_, siginfo._sifields._rt.si_sigval_); break; case SI_TIMER: si._sifields._timer.si_overrun_ = siginfo._sifields._timer.si_overrun_; si._sifields._timer.si_tid_ = siginfo._sifields._timer.si_tid_; assign_sigval(si._sifields._timer.si_sigval_, siginfo._sifields._timer.si_sigval_); break; } switch (siginfo.si_signo) { case SIGCHLD: si._sifields._sigchld.si_pid_ = siginfo._sifields._sigchld.si_pid_; si._sifields._sigchld.si_uid_ = siginfo._sifields._sigchld.si_uid_; si._sifields._sigchld.si_status_ = siginfo._sifields._sigchld.si_status_; si._sifields._sigchld.si_utime_ = siginfo._sifields._sigchld.si_utime_; si._sifields._sigchld.si_stime_ = siginfo._sifields._sigchld.si_stime_; break; case SIGILL: case SIGBUS: case SIGFPE: case SIGSEGV: case SIGTRAP: si._sifields._sigfault.si_addr_ = siginfo._sifields._sigfault.si_addr_.rptr(); si._sifields._sigfault.si_addr_lsb_ = siginfo._sifields._sigfault.si_addr_lsb_; break; case SIGIO: si._sifields._sigpoll.si_band_ = siginfo._sifields._sigpoll.si_band_; si._sifields._sigpoll.si_fd_ = siginfo._sifields._sigpoll.si_fd_; break; case SIGSYS: si._sifields._sigsys._call_addr = siginfo._sifields._sigsys._call_addr.rptr(); si._sifields._sigsys._syscall = siginfo._sifields._sigsys._syscall; si._sifields._sigsys._arch = siginfo._sifields._sigsys._arch; break; } t->write_mem(dest, si); } static void setup_sigframe_siginfo(Task* t, const siginfo_t& siginfo) { RR_ARCH_FUNCTION(setup_sigframe_siginfo_arch, t->arch(), t, *reinterpret_cast(&siginfo)); } /** * Returns true if the signal should be delivered. * Returns false if this signal should not be delivered because another signal * occurred during delivery. */ static bool inject_signal(Task* t) { int sig = t->ev().Signal().siginfo.si_signo; /* Signal injection is tricky. Per the ptrace(2) man page, injecting * a signal while the task is not in a signal-stop is not guaranteed to work * (and indeed, we see that the kernel sometimes ignores such signals). * But some signals must be delayed until after the signal-stop that notified * us of them. * So, first we check if we're in a signal-stop that we can use to inject * a signal. Some (all?) SIGTRAP stops are *not* usable for signal injection. */ if (t->pending_sig() && t->pending_sig() != SIGTRAP) { LOG(debug) << " in signal-stop for " << signal_name(t->pending_sig()); } else { /* We're not in a usable signal-stop. Force a signal-stop by sending * a new signal with tgkill (as the ptrace(2) man page recommends). */ LOG(debug) << " maybe not in signal-stop; tgkill(" << signal_name(sig) << ")"; t->tgkill(sig); /* Now singlestep the task until we're in a signal-stop for the signal * we've just sent. We must absorb and forget that signal here since we * don't want it delivered to the task for real. */ while (true) { auto old_ip = t->ip(); t->resume_execution(RESUME_SINGLESTEP, RESUME_WAIT, RESUME_NO_TICKS); ASSERT(t, old_ip == t->ip()); ASSERT(t, t->pending_sig()); if (t->pending_sig() == sig) { LOG(debug) << " stopped with signal " << signal_name(sig); break; } /* It's possible for other signals to arrive while we're trying to * get to the signal-stop for the signal we just sent. Stash them for * later delivery. */ if (t->pending_sig() == SYSCALLBUF_DESCHED_SIGNAL) { LOG(debug) << " stopped with signal " << signal_name(sig) << "; ignoring it and carrying on"; } else { LOG(debug) << " stopped with signal " << signal_name(sig) << "; stashing it and carrying on"; t->stash_sig(); } } /* We're now in a signal-stop (and for the right signal too, though that * doesn't really matter). */ } /* Now that we're in a signal-stop, we can inject our signal and advance * to the signal handler with one single-step. */ LOG(debug) << " injecting signal number " << t->ev().Signal().siginfo; t->set_siginfo(t->ev().Signal().siginfo); t->resume_execution(RESUME_SINGLESTEP, RESUME_WAIT, RESUME_NO_TICKS, sig); // It's been observed that when tasks enter // sighandlers, the singlestep operation above // doesn't retire any instructions; and // indeed, if an instruction could be retired, // this code wouldn't work. This also // cross-checks the sighandler information we // maintain in |t->sighandlers|. assert(!PerfCounters::extra_perf_counters_enabled() || 0 == t->hpc.read_extra().instructions_retired); if (t->pending_sig() == SIGSEGV) { // Constructing the signal handler frame must have failed. The kernel will // kill the process after this. Stash the signal and mark it as blocked so // we know to treat it as fatal when we inject it. t->stash_sig(); t->set_sig_blocked(SIGSEGV); return false; } ASSERT(t, t->pending_sig() == SIGTRAP); ASSERT(t, t->get_signal_user_handler(sig) == t->ip()); if (t->signal_handler_takes_siginfo(sig)) { // The kernel copied siginfo into userspace so it can pass a pointer to // the signal handler. Replace the contents of that siginfo with // the exact data we want to deliver. (We called Task::set_siginfo // above to set that data, but the kernel sanitizes the passed-in data // which wipes out certain fields; e.g. we can't set SI_KERNEL in si_code.) setup_sigframe_siginfo(t, t->ev().Signal().siginfo); } return true; } static bool is_fatal_signal(Task* t, int sig, SignalDeterministic deterministic) { signal_action action = default_action(sig); if (action != DUMP_CORE && action != TERMINATE) { // If the default action doesn't kill the process, it won't die. return false; } if (t->is_sig_ignored(sig)) { // Deterministic fatal signals can't be ignored. return deterministic == DETERMINISTIC_SIG; } if (!t->signal_has_user_handler(sig)) { // The default action is going to happen: killing the process. return true; } // If the signal's blocked, user handlers aren't going to run and the process // will die. return t->is_sig_blocked(sig); } /** * |t| is being delivered a signal, and its state changed. * * Return true if execution was incidentally resumed to a new event, * false otherwise. */ void RecordSession::signal_state_changed(Task* t, StepState* step_state) { int sig = t->ev().Signal().siginfo.si_signo; switch (t->ev().type()) { case EV_SIGNAL: { // This event is used by the replayer to advance to // the point of signal delivery. t->record_current_event(); t->ev().transform(EV_SIGNAL_DELIVERY); ssize_t sigframe_size = 0; bool blocked = t->is_sig_blocked(sig); // If this is the signal delivered by a sigsuspend, then clear // sigsuspend_blocked_sigs to indicate that future signals are not // being delivered by sigsuspend. t->sigsuspend_blocked_sigs = nullptr; // If a signal is blocked but is still delivered (e.g. a synchronous // terminating signal such as SIGSEGV), user handlers do not run. if (t->signal_has_user_handler(sig) && !blocked) { LOG(debug) << " " << t->tid << ": " << signal_name(sig) << " has user handler"; if (!inject_signal(t)) { // Signal delivery isn't happening. Prepare to process the new // signal that aborted signal delivery. t->signal_delivered(sig); t->pop_event(EV_SIGNAL_DELIVERY); step_state->continue_type = DONT_CONTINUE; last_task_switchable = PREVENT_SWITCH; break; } // It's somewhat difficult engineering-wise to // compute the sigframe size at compile time, // and it can vary across kernel versions. So // this size is an overestimate of the real // size(s). The estimate was made by // comparing $sp before and after entering the // sighandler, for a sighandler that used the // main task stack. On linux 3.11.2, that // computed size was 1736 bytes, which is an // upper bound on the sigframe size. We don't // want to mess with this code much, so we // overapproximate the overapproximation and // round off to 2048. // // If this size becomes too small in the // future, and unit tests that use sighandlers // are run with checksumming enabled, then // they can catch errors here. sigframe_size = 2048; t->ev().transform(EV_SIGNAL_HANDLER); t->signal_delivered(sig); // We already continued! Don't continue now, and allow switching. step_state->continue_type = DONT_CONTINUE; last_task_switchable = ALLOW_SWITCH; } else { LOG(debug) << " " << t->tid << ": no user handler for " << signal_name(sig); // Don't do another task continue. We want to deliver the signal // as the next thing that the task does. step_state->continue_type = DONT_CONTINUE; // If we didn't set up the sighandler frame, we need // to ensure that this tracee is scheduled next so // that we can deliver the signal normally. We have // to do that because setting up the sighandler frame // is synchronous, but delivery otherwise is async. // But right after this, we may have to process some // syscallbuf state, so we can't let the tracee race // with us. last_task_switchable = PREVENT_SWITCH; } // We record this data regardless to simplify replay. If the addresses // are unmapped, write 0 bytes. t->record_remote_fallible(t->sp(), sigframe_size); // This event is used by the replayer to set up the // signal handler frame, or to record the resulting // state of the stepi if there wasn't a signal // handler. t->record_current_event(); break; } case EV_SIGNAL_DELIVERY: step_state->continue_sig = sig; t->signal_delivered(sig); if (is_fatal_signal(t, sig, t->ev().Signal().deterministic)) { LOG(warn) << "Delivered core-dumping signal; may misrecord " "CLONE_CHILD_CLEARTID memory race"; t->destabilize_task_group(); last_task_switchable = ALLOW_SWITCH; } t->pop_signal_delivery(); break; default: FATAL() << "Unhandled signal state " << t->ev().type(); break; } } bool RecordSession::handle_signal_event(Task* t, StepState* step_state) { int sig = t->pending_sig(); if (!sig) { return false; } if (!can_deliver_signals) { // If the initial tracee isn't prepared to handle // signals yet, then us ignoring the ptrace // notification here will have the side effect of // declining to deliver the signal. // // This doesn't really occur in practice, only in // tests that force a degenerately low time slice. LOG(warn) << "Dropping " << signal_name(t->pending_sig()) << " because it can't be delivered yet"; // No events to be recorded, so no syscallbuf updates // needed. return true; } if (is_deterministic_signal(t->get_siginfo()) || sig == SYSCALLBUF_DESCHED_SIGNAL) { // Don't stash these signals; deliver them immediately. // We don't want them to be reordered around other signals. siginfo_t siginfo = t->get_siginfo(); switch (handle_signal(t, &siginfo)) { case SIGNAL_PTRACE_STOP: // Emulated ptrace-stop. Don't run the task again yet. last_task_switchable = ALLOW_SWITCH; step_state->continue_type = DONT_CONTINUE; return true; case DEFER_SIGNAL: ASSERT(t, false) << "Can't defer deterministic or internal signals"; break; case SIGNAL_HANDLED: break; } return false; } if (sig == PerfCounters::TIME_SLICE_SIGNAL) { auto& si = t->get_siginfo(); /* This implementation will of course fall over if rr tries to * record itself. * * NB: we can't check that the ticks is >= the programmed * target, because this signal may have become pending before * we reset the HPC counters. There be a way to handle that * more elegantly, but bridge will be crossed in due time. * * We can't check that the fd matches t->hpc.ticks_fd() because this * signal could have been queued quite a long time ago and the PerfCounters * might have been stopped (and restarted!), perhaps even more than once, * since the signal was queued. possibly changing its fd. We could check * against all fds the PerfCounters have ever used, but that seems like * overkill. */ ASSERT(t, PerfCounters::TIME_SLICE_SIGNAL == si.si_signo && POLL_IN == si.si_code) << "Tracee is using SIGSTKFLT??? (code=" << si.si_code << ", fd=" << si.si_fd << ")"; } t->stash_sig(); return true; } /** * The execution of |t| has just been resumed, and it most likely has * a new event that needs to be processed. Prepare that new event. */ void RecordSession::runnable_state_changed(Task* t, RecordResult* step_result, bool can_consume_wait_status, StepState* step_state) { switch (t->ev().type()) { case EV_NOOP: t->pop_noop(); break; case EV_SEGV_RDTSC: t->record_current_event(); t->pop_event(t->ev().type()); break; case EV_SENTINEL: case EV_SIGNAL_HANDLER: case EV_SYSCALL_INTERRUPTION: if (!can_consume_wait_status) { return; } // We just entered a syscall. if (!maybe_restart_syscall(t)) { // Emit FLUSH_SYSCALLBUF if necessary before we do any patching work t->maybe_flush_syscallbuf(); if (t->vm()->monkeypatcher().try_patch_syscall(t)) { // Syscall was patched. Emit event and continue execution. t->record_event(Event(EV_PATCH_SYSCALL, NO_EXEC_INFO, t->arch())); break; } t->push_event(SyscallEvent(t->regs().original_syscallno(), t->arch())); } ASSERT(t, EV_SYSCALL == t->ev().type()); check_perf_counters_working(t, step_result); t->ev().Syscall().state = ENTERING_SYSCALL; t->record_current_event(); break; default: return; } } bool RecordSession::prepare_to_inject_signal(Task* t, StepState* step_state) { if (!t->has_stashed_sig() || !can_deliver_signals || step_state->continue_type != CONTINUE) { return false; } union { NativeArch::siginfo_t native_api; siginfo_t linux_api; } si; si.linux_api = t->peek_stash_sig(); if (si.linux_api.si_signo == get_ignore_sig()) { LOG(info) << "Declining to deliver " << signal_name(si.linux_api.si_signo) << " by user request"; t->pop_stash_sig(); return false; } if (si.linux_api.si_signo == SIGSYS && si.linux_api.si_code == SYS_SECCOMP) { // Set call_addr to the current ip(). We don't do this when synthesizing // the SIGSYS because the SIGSYS might be triggered during syscallbuf // processing but be delivered later at a // SYS_rrcall_notify_syscall_hook_exit. // Documentation says that si_call_addr is the address of the syscall // instruction, but in tests it's immediately after the syscall // instruction. auto& native_si = si.native_api; native_si._sifields._sigsys._call_addr = t->ip().to_data_ptr(); } switch (handle_signal(t, &si.linux_api)) { case SIGNAL_PTRACE_STOP: // Emulated ptrace-stop. Don't run the task again yet. last_task_switchable = ALLOW_SWITCH; LOG(debug) << "Signal " << si.linux_api.si_signo << ", emulating ptrace stop"; break; case DEFER_SIGNAL: LOG(debug) << "Signal " << si.linux_api.si_signo << " deferred"; // Leave signal on the stack and continue task execution. We'll try again // later. return false; case SIGNAL_HANDLED: LOG(debug) << "Signal " << si.linux_api.si_signo << " handled"; if (t->ev().type() == EV_SCHED) { // Allow switching after a SCHED. We'll flush the SCHED if and only // if we really do a switch. last_task_switchable = ALLOW_SWITCH; } break; } step_state->continue_type = DONT_CONTINUE; t->pop_stash_sig(); return true; } static string find_syscall_buffer_library() { string lib_path = exe_directory() + "../lib/"; string file_name = lib_path + SYSCALLBUF_LIB_FILENAME; if (access(file_name.c_str(), F_OK) != 0) { // File does not exist. Assume install put it in LD_LIBRARY_PATH. lib_path = ""; } return lib_path; } /*static*/ RecordSession::shr_ptr RecordSession::create( const vector& argv, uint32_t flags, const vector& extra_env) { // The syscallbuf library interposes some critical // external symbols like XShmQueryExtension(), so we // preload it whether or not syscallbuf is enabled. Indicate here whether // syscallbuf is enabled. if (flags & DISABLE_SYSCALL_BUF) { unsetenv(SYSCALLBUF_ENABLED_ENV_VAR); } else { setenv(SYSCALLBUF_ENABLED_ENV_VAR, "1", 1); ScopedFd fd("/proc/sys/kernel/perf_event_paranoid", O_RDONLY); if (fd.is_open()) { char buf[100]; ssize_t size = read(fd, buf, sizeof(buf) - 1); if (size >= 0) { buf[size] = 0; int val = atoi(buf); if (val > 1) { FATAL() << "rr needs /proc/sys/kernel/perf_event_paranoid <= 1, but " "it is " << val << ".\nChange it to 1, or use 'rr record -n' (slow)."; } } } } vector env; char** envp = environ; for (; *envp; ++envp) { env.push_back(*envp); } env.insert(env.end(), extra_env.begin(), extra_env.end()); char cwd[PATH_MAX] = ""; getcwd(cwd, sizeof(cwd)); // LD_PRELOAD the syscall interception lib string syscall_buffer_lib_path = find_syscall_buffer_library(); if (!syscall_buffer_lib_path.empty()) { string ld_preload = "LD_PRELOAD="; // Our preload lib *must* come first. We supply a placeholder which is // then mutated to the correct filename in Monkeypatcher::patch_after_exec. ld_preload += syscall_buffer_lib_path + SYSCALLBUF_LIB_FILENAME_PADDED; auto it = env.begin(); for (; it != env.end(); ++it) { if (it->find("LD_PRELOAD=") != 0) { continue; } // Honor old preloads too. This may cause // problems, but only in those libs, and // that's the user's problem. ld_preload += ":"; ld_preload += it->substr(it->find("=") + 1); break; } if (it == env.end()) { env.push_back(ld_preload); } else { *it = ld_preload; } } string env_pair = create_pulseaudio_config(); if (!env_pair.empty()) { env.push_back(env_pair); } env.push_back("RUNNING_UNDER_RR=1"); // Disable Gecko's "wait for gdb to attach on process crash" behavior, since // it is useless when running under rr. env.push_back("MOZ_GDB_SLEEP=0"); shr_ptr session(new RecordSession(argv, env, cwd, flags)); return session; } RecordSession::RecordSession(const std::vector& argv, const std::vector& envp, const string& cwd, uint32_t flags) : trace_out(argv, envp, cwd, choose_cpu(flags)), scheduler_(*this), last_recorded_task(nullptr), ignore_sig(0), last_task_switchable(PREVENT_SWITCH), use_syscall_buffer_(!(flags & DISABLE_SYSCALL_BUF)), can_deliver_signals(false) { last_recorded_task = Task::spawn(*this, trace_out); initial_task_group = last_recorded_task->task_group(); on_create(last_recorded_task); } RecordSession::RecordResult RecordSession::record_step() { RecordResult result; if (tasks().empty()) { result.status = STEP_EXITED; result.exit_code = initial_task_group->exit_code; return result; } result.status = STEP_CONTINUE; bool did_wait; Task* t = scheduler().get_next_thread(last_recorded_task, last_task_switchable, &did_wait); if (!t) { // The scheduler was waiting for some task to become active, but was // interrupted by a signal. Yield to our caller now to give the caller // a chance to do something triggered by the signal // (e.g. terminate the recording). return result; } if (last_recorded_task && last_recorded_task->ev().type() == EV_SCHED) { if (last_recorded_task != t) { // We did do a context switch, so record the SCHED event. Otherwise // we'll just discard it. last_recorded_task->record_current_event(); } last_recorded_task->pop_event(EV_SCHED); } last_recorded_task = t; // Have to disable context-switching until we know it's safe // to allow switching the context. last_task_switchable = PREVENT_SWITCH; LOG(debug) << "line " << t->trace_time() << ": Active task is " << t->tid << ". Events:"; #ifdef DEBUGTAG t->log_pending_events(); #endif if (handle_ptrace_exit_event(t)) { // t is dead and has been deleted. last_recorded_task = nullptr; return result; } if (t->unstable) { // Do not record non-ptrace-exit events for tasks in // an unstable exit. We can't replay them. LOG(debug) << "Task in unstable exit; " "refusing to record non-ptrace events"; last_task_switchable = ALLOW_SWITCH; return result; } StepState step_state(CONTINUE); if (!(did_wait && handle_ptrace_event(t, &step_state)) && !(did_wait && handle_signal_event(t, &step_state))) { runnable_state_changed(t, &result, did_wait, &step_state); if (result.status != STEP_CONTINUE || step_state.continue_type == DONT_CONTINUE) { return result; } switch (t->ev().type()) { case EV_DESCHED: desched_state_changed(t); break; case EV_SYSCALL: syscall_state_changed(t, &step_state); break; case EV_SIGNAL: case EV_SIGNAL_DELIVERY: signal_state_changed(t, &step_state); break; default: break; } } // We try to inject a signal if there's one pending; otherwise we continue // task execution. if (!prepare_to_inject_signal(t, &step_state) && step_state.continue_type != DONT_CONTINUE) { // Ensure that we aren't allowing switches away from a running task. // Only tasks blocked in a syscall can be switched away from, otherwise // we have races. ASSERT(t, last_task_switchable == PREVENT_SWITCH || t->unstable || t->may_be_blocked()); debug_exec_state("EXEC_START", t); task_continue(t, step_state); } return result; } void RecordSession::terminate_recording() { if (last_recorded_task) { last_recorded_task->maybe_flush_syscallbuf(); } LOG(info) << "Processing termination request ..."; for (auto& t : tasks()) { // Emit UNSTABLE_EXIT events so the debugger can stop on them. t.second->record_event( Event(EV_UNSTABLE_EXIT, NO_EXEC_INFO, t.second->arch())); } LOG(info) << " recording final TRACE_TERMINATION event ..."; TraceFrame frame(trace_out.time(), last_recorded_task ? last_recorded_task->tid : 0, Event(EV_TRACE_TERMINATION, NO_EXEC_INFO, RR_NATIVE_ARCH), last_recorded_task ? last_recorded_task->tick_count() : 0); trace_out.write_frame(frame); trace_out.close(); } void RecordSession::on_create(Task* t) { Session::on_create(t); scheduler().on_create(t); } void RecordSession::on_destroy(Task* t) { scheduler().on_destroy(t); Session::on_destroy(t); } rr-4.1.0/src/RecordSession.h000066400000000000000000000101151265436462100156700ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_RECORD_SESSION_H_ #define RR_RECORD_SESSION_H_ #include #include #include "Scheduler.h" #include "SeccompFilterRewriter.h" #include "Session.h" #include "task.h" #include "TraceFrame.h" /** Encapsulates additional session state related to recording. */ class RecordSession : public Session { public: typedef std::shared_ptr shr_ptr; /** * Create a recording session for the initial command line |argv|. */ enum { DISABLE_SYSCALL_BUF = 0x01, CPU_UNBOUND = 0x02 }; static shr_ptr create( const std::vector& argv, uint32_t flags = 0, const std::vector& extra_env = std::vector()); bool use_syscall_buffer() const { return use_syscall_buffer_; } void set_ignore_sig(int ignore_sig) { this->ignore_sig = ignore_sig; } int get_ignore_sig() const { return ignore_sig; } enum RecordStatus { // Some execution was recorded. record_step() can be called again. STEP_CONTINUE, // All tracees are dead. record_step() should not be called again. STEP_EXITED, // Initial exec of the tracee failed. STEP_EXEC_FAILED, // Required performance counter features not detected. STEP_PERF_COUNTERS_UNAVAILABLE }; struct RecordResult { RecordStatus status; // When status == STEP_EXITED int exit_code; }; /** * Record some tracee execution. * This may block. If blocking is interrupted by a signal, will return * STEP_CONTINUE. * Typically you'd call this in a loop until it returns something other than * STEP_CONTINUE. * Note that when this returns, some tasks may be running (not in a ptrace- * stop). In particular, up to one task may be executing user code and any * number of tasks may be blocked in syscalls. */ RecordResult record_step(); /** * Flush buffers and write a termination record to the trace. Don't call * record_step() after this. */ void terminate_recording(); virtual RecordSession* as_record() { return this; } TraceWriter& trace_writer() { return trace_out; } virtual void on_destroy(Task* t); Scheduler& scheduler() { return scheduler_; } SeccompFilterRewriter& seccomp_filter_rewriter() { return seccomp_filter_rewriter_; } enum ContinueType { DONT_CONTINUE = 0, CONTINUE, CONTINUE_SYSCALL }; struct StepState { // Continue with this continuation type. ContinueType continue_type; // If continuing, inject this signal int continue_sig; StepState(ContinueType continue_type) : continue_type(continue_type), continue_sig(0) {} }; private: RecordSession(const std::vector& argv, const std::vector& envp, const std::string& cwd, uint32_t flags); virtual void on_create(Task* t); void check_perf_counters_working(Task* t, RecordResult* step_result); bool handle_ptrace_event(Task* t, StepState* step_state); bool handle_signal_event(Task* t, StepState* step_state); void runnable_state_changed(Task* t, RecordResult* step_result, bool can_consume_wait_status, StepState* step_state); void signal_state_changed(Task* t, StepState* step_state); void syscall_state_changed(Task* t, StepState* step_state); void desched_state_changed(Task* t); bool prepare_to_inject_signal(Task* t, StepState* step_state); void task_continue(Task* t, const StepState& step_state); TraceWriter trace_out; Scheduler scheduler_; Task* last_recorded_task; TaskGroup::shr_ptr initial_task_group; SeccompFilterRewriter seccomp_filter_rewriter_; int ignore_sig; Switchable last_task_switchable; bool use_syscall_buffer_; /* True when it's safe to deliver signals, namely, when the initial * tracee has exec()'d the tracee image. Before then, the address * space layout will not be the same during replay as recording, so * replay won't be able to find the right execution point to deliver * the signal. */ bool can_deliver_signals; }; #endif // RR_RECORD_SESSION_H_ rr-4.1.0/src/Registers.cc000066400000000000000000000546231265436462100152270ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "registers" #include "Registers.h" #include #include #include #include #include #include "log.h" #include "task.h" using namespace rr; using namespace std; struct RegisterValue { // The name of this register. const char* name; // The offsetof the register in user_regs_struct. size_t offset; // The size of the register. 0 means we cannot read it. size_t nbytes; // Mask to be applied to register values prior to comparing them. Will // typically be ((1 << nbytes) - 1), but some registers may have special // comparison semantics. uint64_t comparison_mask; constexpr RegisterValue() : name(nullptr), offset(0), nbytes(0), comparison_mask(0) {} RegisterValue(const char* name_, size_t offset_, size_t nbytes_) : name(name_), offset(offset_), nbytes(nbytes_) { comparison_mask = mask_for_nbytes(nbytes_); } RegisterValue(const char* name_, size_t offset_, size_t nbytes_, uint64_t comparison_mask_) : name(name_), offset(offset_), nbytes(nbytes_), comparison_mask(comparison_mask_) { // Ensure no bits are set outside of the register's bitwidth. assert((comparison_mask_ & ~mask_for_nbytes(nbytes_)) == 0); } // Returns a pointer to the register in |regs| represented by |offset|. // |regs| is assumed to be a pointer to the user_struct_regs for the // appropriate architecture. void* pointer_into(void* regs) { return static_cast(regs) + offset; } const void* pointer_into(const void* regs) { return static_cast(regs) + offset; } static uint64_t mask_for_nbytes(size_t nbytes) { assert(nbytes <= sizeof(comparison_mask)); return ((nbytes == sizeof(comparison_mask)) ? uint64_t(0) : (uint64_t(1) << nbytes * 8)) - 1; } }; typedef std::pair RegisterInit; template struct RegisterTable : std::array { RegisterTable(std::initializer_list list) { for (auto& ri : list) { (*this)[ri.first] = ri.second; } } }; template struct RegisterInfo; template <> struct RegisterInfo { static bool ignore_undefined_register(GdbRegister regno) { return regno == DREG_FOSEG || regno == DREG_MXCSR; } static const size_t num_registers = DREG_NUM_LINUX_I386; typedef RegisterTable Table; static Table registers; static RegisterValue non_gdb_registers[0]; }; template <> struct RegisterInfo { static bool ignore_undefined_register(GdbRegister regno) { return regno == DREG_64_FOSEG || regno == DREG_64_MXCSR; } static const size_t num_registers = DREG_NUM_LINUX_X86_64; typedef RegisterTable Table; static Table registers; static RegisterValue non_gdb_registers[2]; }; #define RV_ARCH(gdb_suffix, name, arch, extra_ctor_args) \ RegisterInit(DREG_##gdb_suffix, \ RegisterValue(#name, offsetof(arch::user_regs_struct, name), \ sizeof(((arch::user_regs_struct*)0)->name) \ extra_ctor_args)) #define RV_X86(gdb_suffix, name) \ RV_ARCH(gdb_suffix, name, rr::X86Arch, /* empty */) #define RV_X64(gdb_suffix, name) \ RV_ARCH(gdb_suffix, name, rr::X64Arch, /* empty */) #define COMMA , #define RV_X86_WITH_MASK(gdb_suffix, name, comparison_mask) \ RV_ARCH(gdb_suffix, name, rr::X86Arch, COMMA comparison_mask) #define RV_X64_WITH_MASK(gdb_suffix, name, comparison_mask) \ RV_ARCH(gdb_suffix, name, rr::X64Arch, COMMA comparison_mask) const uint64_t deterministic_eflags_mask = ~uint32_t( /* The following are eflags that have been observed to be non-deterministic in practice. We need to mask them off when comparing registers to prevent replay from diverging. */ /* The linux kernel has been observed to report this as zero in some states during system calls. It always seems to be 1 during user-space execution so we should be able to ignore it. */ X86_RESERVED_FLAG | /* This is usually set but we have observed cases where it's clear. It * shouldn't be modifiable by user space so we don't know why it would * change. */ X86_IF_FLAG | /* According to http://www.logix.cz/michal/doc/i386/chp04-01.htm: The RF flag temporarily disables debug exceptions so that an instruction can be restarted after a debug exception without immediately causing another debug exception. Refer to Chapter 12 for details. Chapter 12 isn't particularly clear on the point, but the flag appears to be set by |int3| exceptions. This divergence has been observed when continuing a tracee to an execution target by setting an |int3| breakpoint, which isn't used during recording. No single-stepping was used during the recording either. */ X86_RF_FLAG | /* It is no longer known why this bit is ignored. */ X86_ID_FLAG); RegisterInfo::Table RegisterInfo::registers = { RV_X86(EAX, eax), RV_X86(ECX, ecx), RV_X86(EDX, edx), RV_X86(EBX, ebx), RV_X86(ESP, esp), RV_X86(EBP, ebp), RV_X86(ESI, esi), RV_X86(EDI, edi), RV_X86(EIP, eip), RV_X86_WITH_MASK(EFLAGS, eflags, deterministic_eflags_mask), RV_X86_WITH_MASK(CS, xcs, 0), RV_X86_WITH_MASK(SS, xss, 0), RV_X86_WITH_MASK(DS, xds, 0), RV_X86_WITH_MASK(ES, xes, 0), RV_X86(FS, xfs), RV_X86(GS, xgs), // The comparison for this is handled specially elsewhere. RV_X86_WITH_MASK(ORIG_EAX, orig_eax, 0), }; RegisterValue RegisterInfo::non_gdb_registers[0] = {}; RegisterInfo::Table RegisterInfo::registers = { RV_X64(RAX, rax), RV_X64(RCX, rcx), RV_X64(RDX, rdx), RV_X64(RBX, rbx), RV_X64_WITH_MASK(RSP, rsp, 0), RV_X64(RBP, rbp), RV_X64(RSI, rsi), RV_X64(RDI, rdi), RV_X64(R8, r8), RV_X64(R9, r9), RV_X64(R10, r10), RV_X64(R11, r11), RV_X64(R12, r12), RV_X64(R13, r13), RV_X64(R14, r14), RV_X64(R15, r15), RV_X64(RIP, rip), RV_X64_WITH_MASK(64_EFLAGS, eflags, deterministic_eflags_mask), RV_X64_WITH_MASK(64_CS, cs, 0), RV_X64_WITH_MASK(64_SS, ss, 0), RV_X64_WITH_MASK(64_DS, ds, 0), RV_X64_WITH_MASK(64_ES, es, 0), RV_X64(64_FS, fs), RV_X64(64_GS, gs), // The comparison for this is handled specially // elsewhere. RV_X64_WITH_MASK(ORIG_RAX, orig_rax, 0), }; RegisterValue RegisterInfo::non_gdb_registers[2] = { { "fs_base", offsetof(rr::X64Arch::user_regs_struct, fs_base), 8, RegisterValue::mask_for_nbytes(8) }, { "gs_base", offsetof(rr::X64Arch::user_regs_struct, gs_base), 8, RegisterValue::mask_for_nbytes(8) } }; #undef RV_X64 #undef RV_X86 #undef RV_ARCH // 32-bit format, 64-bit format for all of these. // format_index in RegisterPrinting depends on the ordering here. static const char* hex_format[] = { "%" PRIx32, "%" PRIx64 }; static const char* hex_format_leading_0x[] = { "0x%" PRIx32, "0x%" PRIx64 }; // static const char* decimal_format[] = { "%" PRId32, "%" PRId64 }; template struct RegisterPrinting; template <> struct RegisterPrinting<4> { typedef uint32_t type; static const size_t format_index = 0; }; template <> struct RegisterPrinting<8> { typedef uint64_t type; static const size_t format_index = 1; }; template void print_single_register(FILE* f, const char* name, const void* register_ptr, const char* formats[]) { typename RegisterPrinting::type val; memcpy(&val, register_ptr, nbytes); if (name) { fprintf(f, "%s:", name); } else { fprintf(f, " "); } fprintf(f, formats[RegisterPrinting::format_index], val); } template void Registers::print_register_file_arch(FILE* f, const char* formats[]) const { fprintf(f, "Printing register file:\n"); const void* user_regs = &u; for (auto& rv : RegisterInfo::registers) { if (rv.nbytes == 0) { continue; } switch (rv.nbytes) { case 8: print_single_register<8>(f, rv.name, rv.pointer_into(user_regs), formats); break; case 4: print_single_register<4>(f, rv.name, rv.pointer_into(user_regs), formats); break; default: assert(0 && "bad register size"); } fprintf(f, "\n"); } fprintf(f, "\n"); } void Registers::print_register_file(FILE* f) const { RR_ARCH_FUNCTION(print_register_file_arch, arch(), f, hex_format); } template void Registers::print_register_file_for_trace_arch( FILE* f, TraceStyle style, const char* formats[]) const { const void* user_regs = &u; for (auto& rv : RegisterInfo::registers) { if (rv.nbytes == 0) { continue; } fprintf(f, " "); const char* name = (style == Annotated ? rv.name : nullptr); switch (rv.nbytes) { case 8: print_single_register<8>(f, name, rv.pointer_into(user_regs), formats); break; case 4: print_single_register<4>(f, name, rv.pointer_into(user_regs), formats); break; default: assert(0 && "bad register size"); } } for (auto& rv : RegisterInfo::non_gdb_registers) { fprintf(f, " "); const char* name = (style == Annotated ? rv.name : nullptr); switch (rv.nbytes) { case 8: print_single_register<8>(f, name, rv.pointer_into(user_regs), formats); break; case 4: print_single_register<4>(f, name, rv.pointer_into(user_regs), formats); break; default: assert(0 && "bad register size"); } } } void Registers::print_register_file_compact(FILE* f) const { RR_ARCH_FUNCTION(print_register_file_for_trace_arch, arch(), f, Annotated, hex_format); } void Registers::print_register_file_for_trace(FILE* f) const { RR_ARCH_FUNCTION(print_register_file_for_trace_arch, arch(), f, Annotated, hex_format_leading_0x); } void Registers::print_register_file_for_trace_raw(FILE* f) const { fprintf(f, " %d %d %d %d %d %d %d" " %d %d %d %d", u.x86regs.eax, u.x86regs.ebx, u.x86regs.ecx, u.x86regs.edx, u.x86regs.esi, u.x86regs.edi, u.x86regs.ebp, u.x86regs.orig_eax, u.x86regs.esp, u.x86regs.eip, u.x86regs.eflags); } static void maybe_print_reg_mismatch(MismatchBehavior mismatch_behavior, const char* regname, const char* label1, uint64_t val1, const char* label2, uint64_t val2) { if (mismatch_behavior >= BAIL_ON_MISMATCH) { LOG(error) << regname << " " << HEX(val1) << " != " << HEX(val2) << " (" << label1 << " vs. " << label2 << ")"; } else if (mismatch_behavior >= LOG_MISMATCHES) { LOG(info) << regname << " " << HEX(val1) << " != " << HEX(val2) << " (" << label1 << " vs. " << label2 << ")"; } } template bool Registers::compare_registers_core(const char* name1, const Registers& reg1, const char* name2, const Registers& reg2, MismatchBehavior mismatch_behavior) { bool match = true; for (auto& rv : RegisterInfo::registers) { if (rv.nbytes == 0) { continue; } // Disregard registers that will trivially compare equal. if (rv.comparison_mask == 0) { continue; } // XXX correct but oddly displayed for big-endian processors. uint64_t val1 = 0, val2 = 0; memcpy(&val1, rv.pointer_into(®1.u), rv.nbytes); memcpy(&val2, rv.pointer_into(®2.u), rv.nbytes); if ((val1 ^ val2) & rv.comparison_mask) { maybe_print_reg_mismatch(mismatch_behavior, rv.name, name1, val1, name2, val2); match = false; } } return match; } // A handy macro for compare_registers_arch specializations. #define REGCMP(user_regs, _reg) \ do { \ if (reg1.user_regs._reg != reg2.user_regs._reg) { \ maybe_print_reg_mismatch(mismatch_behavior, #_reg, name1, \ reg1.user_regs._reg, name2, \ reg2.user_regs._reg); \ match = false; \ } \ } while (0) #define X86_REGCMP(_reg) REGCMP(u.x86regs, _reg) #define X64_REGCMP(_reg) REGCMP(u.x64regs, _reg) // A wrapper around compare_registers_core so registers requiring special // processing can be handled via template specialization. template /* static */ bool Registers::compare_registers_arch( const char* name1, const Registers& reg1, const char* name2, const Registers& reg2, MismatchBehavior mismatch_behavior) { // Default behavior. return compare_registers_core(name1, reg1, name2, reg2, mismatch_behavior); } template <> /* static */ bool Registers::compare_registers_arch( const char* name1, const Registers& reg1, const char* name2, const Registers& reg2, MismatchBehavior mismatch_behavior) { bool match = compare_registers_core(name1, reg1, name2, reg2, mismatch_behavior); /* Negative orig_eax values, observed at SCHED events and signals, seemingly can vary between recording and replay on some kernels (e.g. Linux ubuntu 3.13.0-24-generic). They probably reflect signals sent or something like that. */ if (reg1.u.x86regs.orig_eax >= 0 || reg2.u.x86regs.orig_eax >= 0) { X86_REGCMP(orig_eax); } return match; } template <> /* static */ bool Registers::compare_registers_arch( const char* name1, const Registers& reg1, const char* name2, const Registers& reg2, MismatchBehavior mismatch_behavior) { bool match = compare_registers_core(name1, reg1, name2, reg2, mismatch_behavior); // XXX haven't actually observed this to be true on x86-64 yet, but // assuming that it follows the x86 behavior. if ((intptr_t)reg1.u.x64regs.orig_rax >= 0 || (intptr_t)reg2.u.x64regs.orig_rax >= 0) { X64_REGCMP(orig_rax); } // Check the _upper bits of various registers we defined more conveniently // for our gdb support. X64_REGCMP(cs_upper); X64_REGCMP(ds_upper); X64_REGCMP(es_upper); X64_REGCMP(fs_upper); X64_REGCMP(gs_upper); X64_REGCMP(ss_upper); X64_REGCMP(eflags_upper); return match; } /*static*/ bool Registers::compare_register_files_internal( const char* name1, const Registers& reg1, const char* name2, const Registers& reg2, MismatchBehavior mismatch_behavior) { assert(reg1.arch() == reg2.arch()); RR_ARCH_FUNCTION(compare_registers_arch, reg1.arch(), name1, reg1, name2, reg2, mismatch_behavior); } /*static*/ bool Registers::compare_register_files( Task* t, const char* name1, const Registers& reg1, const char* name2, const Registers& reg2, MismatchBehavior mismatch_behavior) { bool bail_error = mismatch_behavior >= BAIL_ON_MISMATCH; bool match = compare_register_files_internal(name1, reg1, name2, reg2, mismatch_behavior); ASSERT(t, !bail_error || match) << "Fatal register mismatch (ticks/rec:" << t->tick_count() << "/" << t->current_trace_frame().ticks() << ")"; if (match && mismatch_behavior == LOG_MISMATCHES) { LOG(info) << "(register files are the same for " << name1 << " and " << name2 << ")"; } return match; } template size_t Registers::read_register_arch(uint8_t* buf, GdbRegister regno, bool* defined) const { assert(regno < total_registers()); // Make sure these two definitions don't get out of sync. assert(array_length(RegisterInfo::registers) == total_registers()); RegisterValue& rv = RegisterInfo::registers[regno]; if (rv.nbytes == 0) { *defined = false; } else { *defined = true; memcpy(buf, rv.pointer_into(&u), rv.nbytes); } return rv.nbytes; } size_t Registers::read_register(uint8_t* buf, GdbRegister regno, bool* defined) const { RR_ARCH_FUNCTION(read_register_arch, arch(), buf, regno, defined); } template size_t Registers::read_register_by_user_offset_arch(uint8_t* buf, uintptr_t offset, bool* defined) const { for (size_t regno = 0; regno < RegisterInfo::num_registers; ++regno) { RegisterValue& rv = RegisterInfo::registers[regno]; if (rv.offset == offset) { return read_register_arch(buf, GdbRegister(regno), defined); } } *defined = false; return 0; } size_t Registers::read_register_by_user_offset(uint8_t* buf, uintptr_t offset, bool* defined) const { RR_ARCH_FUNCTION(read_register_by_user_offset_arch, arch(), buf, offset, defined); } template void Registers::write_register_arch(GdbRegister regno, const uint8_t* value, size_t value_size) { RegisterValue& rv = RegisterInfo::registers[regno]; if (rv.nbytes == 0) { // TODO: can we get away with not writing these? if (RegisterInfo::ignore_undefined_register(regno)) { return; } LOG(warn) << "Unhandled register name " << regno; } else { assert(value_size == rv.nbytes); memcpy(rv.pointer_into(&u), value, value_size); } } void Registers::write_register(GdbRegister regno, const uint8_t* value, size_t value_size) { RR_ARCH_FUNCTION(write_register_arch, arch(), regno, value, value_size); } template size_t Registers::total_registers_arch() const { return RegisterInfo::num_registers; } size_t Registers::total_registers() const { RR_ARCH_FUNCTION(total_registers_arch, arch()); } typedef void (*NarrowConversion)(int32_t& r32, uint64_t& r64); typedef void (*SameConversion)(int32_t& r32, uint32_t& r64); template void convert_x86(X86Arch::user_regs_struct& x86, X64Arch::user_regs_struct& x64) { narrow(x86.eax, x64.rax); narrow(x86.ebx, x64.rbx); narrow(x86.ecx, x64.rcx); narrow(x86.edx, x64.rdx); narrow(x86.esi, x64.rsi); narrow(x86.edi, x64.rdi); narrow(x86.esp, x64.rsp); narrow(x86.ebp, x64.rbp); narrow(x86.eip, x64.rip); narrow(x86.orig_eax, x64.orig_rax); same(x86.eflags, x64.eflags); same(x86.xcs, x64.cs); same(x86.xds, x64.ds); same(x86.xes, x64.es); same(x86.xfs, x64.fs); same(x86.xgs, x64.gs); same(x86.xss, x64.ss); } void to_x86_narrow(int32_t& r32, uint64_t& r64) { r32 = r64; } void to_x86_same(int32_t& r32, uint32_t& r64) { r32 = r64; } void from_x86_narrow(int32_t& r32, uint64_t& r64) { // We must zero-extend 32-bit register values to 64-bit values when we // do a PTRACE_SETREGS. Most of the time the upper 32 bits are irrelevant for // a 32-bit tracee, but when setting up a signal handler frame, the kernel // does some arithmetic on the 64-bit SP value and validates that the // result points to writeable memory. This validation fails if SP has been // sign-extended to point outside the 32-bit address space. r64 = (uint32_t)r32; } void from_x86_same(int32_t& r32, uint32_t& r64) { r64 = r32; } void Registers::set_from_ptrace(const struct user_regs_struct& ptrace_regs) { if (arch() == NativeArch::arch()) { memcpy(&u, &ptrace_regs, sizeof(ptrace_regs)); return; } assert(arch() == x86 && NativeArch::arch() == x86_64); convert_x86( u.x86regs, *reinterpret_cast( const_cast(&ptrace_regs))); } /** * Get a user_regs_struct from these Registers. If the tracee architecture * is not rr's native architecture, then it must be a 32-bit tracee with a * 64-bit rr. In that case the user_regs_struct is 64-bit and we copy * the 32-bit register values from u.x86regs into it. */ struct user_regs_struct Registers::get_ptrace() const { union { struct user_regs_struct linux_api; struct X64Arch::user_regs_struct x64arch_api; } result; if (arch() == NativeArch::arch()) { memcpy(&result, &u, sizeof(result)); return result.linux_api; } assert(arch() == x86 && NativeArch::arch() == x86_64); memset(&result, 0, sizeof(result)); convert_x86( const_cast(this)->u.x86regs, result.x64arch_api); return result.linux_api; } vector Registers::get_ptrace_for_arch(SupportedArch arch) const { Registers tmp_regs(arch); tmp_regs.set_from_ptrace(get_ptrace()); vector result; switch (arch) { case x86: result.resize(sizeof(u.x86regs)); memcpy(result.data(), &u.x86regs, result.size()); break; case x86_64: result.resize(sizeof(u.x64regs)); memcpy(result.data(), &u.x64regs, result.size()); break; default: assert(0 && "Unknown arch"); break; } return result; } uintptr_t Registers::flags() const { switch (arch()) { case x86: return u.x86regs.eflags; case x86_64: return u.x64regs.eflags | (uint64_t(u.x64regs.eflags_upper) << 32); default: assert(0 && "Unknown arch"); return false; } } void Registers::set_flags(uintptr_t value) { switch (arch()) { case x86: u.x86regs.eflags = value; break; case x86_64: u.x64regs.eflags = value; u.x64regs.eflags_upper = uint64_t(value) >> 32; break; default: assert(0 && "Unknown arch"); break; } } ostream& operator<<(ostream& stream, const Registers& r) { stream << "{ args:(" << HEX(r.arg1()) << "," << HEX(r.arg2()) << "," << HEX(r.arg3()) << "," << HEX(r.arg4()) << "," << HEX(r.arg5()) << "," << r.arg6() << ") orig_syscall:" << r.original_syscallno() << " }"; return stream; } rr-4.1.0/src/Registers.h000066400000000000000000000352531265436462100150670ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_REGISTERS_H_ #define RR_REGISTERS_H_ #include #include #include #include #include #include #include #include "GdbRegister.h" #include "kernel_abi.h" #include "kernel_supplement.h" #include "remote_ptr.h" #include "remote_code_ptr.h" class Task; enum MismatchBehavior { EXPECT_MISMATCHES = 0, LOG_MISMATCHES, BAIL_ON_MISMATCH }; const uintptr_t X86_RESERVED_FLAG = 1 << 1; const uintptr_t X86_TF_FLAG = 1 << 8; const uintptr_t X86_IF_FLAG = 1 << 9; const uintptr_t X86_DF_FLAG = 1 << 10; const uintptr_t X86_RF_FLAG = 1 << 16; const uintptr_t X86_ID_FLAG = 1 << 21; /** * A Registers object contains values for all general-purpose registers. * These must include all registers used to pass syscall parameters and return * syscall results. * * When reading register values, be sure to cast the result to the correct * type according to the kernel docs. E.g. int values should be cast * to int explicitly (or implicitly, by assigning to an int-typed variable), * size_t should be cast to size_t, etc. If the type is signed, call the * _signed getter. This ensures that when building rr 64-bit we will use the * right number of register bits whether the tracee is 32-bit or 64-bit, and * get sign-extension right. * * We have different register sets for different architectures. To ensure a * trace can be dumped/processed by an rr build on any platform, we allow * Registers to contain registers for any architecture. So we store them * in a union of Arch::user_regs_structs for each known Arch. */ class Registers { public: enum { MAX_SIZE = 16 }; Registers(SupportedArch a = SupportedArch(-1)) : arch_(a) { memset(&u, 0, sizeof(u)); } SupportedArch arch() const { return arch_; } void set_arch(SupportedArch a) { arch_ = a; } /** * Copy a user_regs_struct into these Registers. If the tracee architecture * is not rr's native architecture, then it must be a 32-bit tracee with a * 64-bit rr. In that case the user_regs_struct is 64-bit and we extract * the 32-bit register values from it into u.x86regs. * It's invalid to call this when the Registers' arch is 64-bit and the * rr build is 32-bit, or when the Registers' arch is completely different * to the rr build (e.g. ARM vs x86). */ void set_from_ptrace(const struct user_regs_struct& ptrace_regs); /** * Get a user_regs_struct from these Registers. If the tracee architecture * is not rr's native architecture, then it must be a 32-bit tracee with a * 64-bit rr. In that case the user_regs_struct is 64-bit and we copy * the 32-bit register values from u.x86regs into it. * It's invalid to call this when the Registers' arch is 64-bit and the * rr build is 32-bit, or when the Registers' arch is completely different * to the rr build (e.g. ARM vs x86). */ struct user_regs_struct get_ptrace() const; /** * Get a user_regs_struct for a particular Arch from these Registers. * It's invalid to call this when 'arch' is 64-bit and the * rr build is 32-bit, or when the Registers' arch is completely different * to the rr build (e.g. ARM vs x86). */ std::vector get_ptrace_for_arch(SupportedArch arch) const; #define RR_GET_REG(x86case, x64case) \ (arch() == x86 ? (uint32_t)u.x86regs.x86case \ : arch() == x86_64 \ ? u.x64regs.x64case \ : (assert(0 && "unknown architecture"), uintptr_t(-1))) #define RR_GET_REG_SIGNED(x86case, x64case) \ (arch() == x86 ? u.x86regs.x86case \ : arch() == x86_64 \ ? u.x64regs.x64case \ : (assert(0 && "unknown architecture"), uintptr_t(-1))) #define RR_SET_REG(x86case, x64case, value) \ switch (arch()) { \ case x86: \ u.x86regs.x86case = (value); \ break; \ case x86_64: \ u.x64regs.x64case = (value); \ break; \ default: \ assert(0 && "unknown architecture"); \ } remote_code_ptr ip() const { return RR_GET_REG(eip, rip); } void set_ip(remote_code_ptr addr) { RR_SET_REG(eip, rip, addr.register_value()); } remote_ptr sp() const { return RR_GET_REG(esp, rsp); } void set_sp(remote_ptr addr) { RR_SET_REG(esp, rsp, addr.as_int()); } // Access the registers holding system-call numbers, results, and // parameters. intptr_t syscallno() const { return RR_GET_REG(eax, rax); } void set_syscallno(intptr_t syscallno) { RR_SET_REG(eax, rax, syscallno); } uintptr_t syscall_result() const { return RR_GET_REG(eax, rax); } intptr_t syscall_result_signed() const { return RR_GET_REG_SIGNED(eax, rax); } void set_syscall_result(uintptr_t syscall_result) { RR_SET_REG(eax, rax, syscall_result); } template void set_syscall_result(remote_ptr syscall_result) { RR_SET_REG(eax, rax, syscall_result.as_int()); } /** * Returns true if syscall_result() indicates failure. */ bool syscall_failed() const { auto result = syscall_result_signed(); return -ERANGE <= result && result < 0; } /** * Returns true if syscall_result() indicates a syscall restart. */ bool syscall_may_restart() const { switch (-syscall_result_signed()) { case ERESTART_RESTARTBLOCK: case ERESTARTNOINTR: case ERESTARTNOHAND: case ERESTARTSYS: return true; default: return false; } } /** * This pseudo-register holds the system-call number when we get ptrace * enter-system-call and exit-system-call events. Setting it changes * the system-call executed when resuming after an enter-system-call * event. */ intptr_t original_syscallno() const { return RR_GET_REG_SIGNED(orig_eax, orig_rax); } void set_original_syscallno(intptr_t syscallno) { RR_SET_REG(orig_eax, orig_rax, syscallno); } uintptr_t arg1() const { return RR_GET_REG(ebx, rdi); } intptr_t arg1_signed() const { return RR_GET_REG_SIGNED(ebx, rdi); } void set_arg1(uintptr_t value) { RR_SET_REG(ebx, rdi, value); } template void set_arg1(remote_ptr value) { RR_SET_REG(ebx, rdi, value.as_int()); } uintptr_t arg2() const { return RR_GET_REG(ecx, rsi); } intptr_t arg2_signed() const { return RR_GET_REG_SIGNED(ecx, rsi); } void set_arg2(uintptr_t value) { RR_SET_REG(ecx, rsi, value); } template void set_arg2(remote_ptr value) { RR_SET_REG(ecx, rsi, value.as_int()); } uintptr_t arg3() const { return RR_GET_REG(edx, rdx); } intptr_t arg3_signed() const { return RR_GET_REG_SIGNED(edx, rdx); } void set_arg3(uintptr_t value) { RR_SET_REG(edx, rdx, value); } template void set_arg3(remote_ptr value) { RR_SET_REG(edx, rdx, value.as_int()); } uintptr_t arg4() const { return RR_GET_REG(esi, r10); } intptr_t arg4_signed() const { return RR_GET_REG_SIGNED(esi, r10); } void set_arg4(uintptr_t value) { RR_SET_REG(esi, r10, value); } template void set_arg4(remote_ptr value) { RR_SET_REG(esi, r10, value.as_int()); } uintptr_t arg5() const { return RR_GET_REG(edi, r8); } intptr_t arg5_signed() const { return RR_GET_REG_SIGNED(edi, r8); } void set_arg5(uintptr_t value) { RR_SET_REG(edi, r8, value); } template void set_arg5(remote_ptr value) { RR_SET_REG(edi, r8, value.as_int()); } uintptr_t arg6() const { return RR_GET_REG(ebp, r9); } intptr_t arg6_signed() const { return RR_GET_REG_SIGNED(ebp, r9); } void set_arg6(uintptr_t value) { RR_SET_REG(ebp, r9, value); } template void set_arg6(remote_ptr value) { RR_SET_REG(ebp, r9, value.as_int()); } uintptr_t arg(int index) const { switch (index) { case 1: return arg1(); case 2: return arg2(); case 3: return arg3(); case 4: return arg4(); case 5: return arg5(); case 6: return arg6(); default: assert(0 && "Argument index out of range"); return 0; } } /** * Set the register containing syscall argument |Index| to * |value|. */ template void set_arg(T value) { set_arg(Index, uintptr_t(value)); } template void set_arg(remote_ptr value) { set_arg(Index, value.as_int()); } void set_arg(int index, uintptr_t value) { switch (index) { case 1: return set_arg1(value); case 2: return set_arg2(value); case 3: return set_arg3(value); case 4: return set_arg4(value); case 5: return set_arg5(value); case 6: return set_arg6(value); default: assert(0 && "Argument index out of range"); } } // Some X86-specific stuff follows. Use of these accessors should be guarded // by an architecture test. /** * Set the output registers of the |rdtsc| instruction. */ void set_rdtsc_output(uint64_t value) { RR_SET_REG(eax, rax, value & 0xffffffff); RR_SET_REG(edx, rdx, value >> 32); } uintptr_t r11() const { assert(arch() == x86_64); return u.x64regs.r11; } void set_r11(uintptr_t value) { assert(arch() == x86_64); u.x64regs.r11 = value; } uintptr_t di() const { return RR_GET_REG(edi, rdi); } void set_di(uintptr_t value) { RR_SET_REG(edi, rdi, value); } uintptr_t si() const { return RR_GET_REG(esi, rsi); } void set_si(uintptr_t value) { RR_SET_REG(esi, rsi, value); } uintptr_t cx() const { return RR_GET_REG(ecx, rcx); } void set_cx(uintptr_t value) { RR_SET_REG(ecx, rcx, value); } uintptr_t bp() const { return RR_GET_REG(ebp, rbp); } uintptr_t flags() const; void set_flags(uintptr_t value); bool singlestep_flag() { return flags() & X86_TF_FLAG; } void clear_singlestep_flag() { set_flags(flags() & ~X86_TF_FLAG); } bool df_flag() const { return flags() & X86_DF_FLAG; } // End of X86-specific stuff void print_register_file(FILE* f) const; void print_register_file_compact(FILE* f) const; void print_register_file_for_trace(FILE* f) const; void print_register_file_for_trace_raw(FILE* f) const; /** * Return true if |reg1| matches |reg2|. Passing EXPECT_MISMATCHES * indicates that the caller is using this as a general register * compare and nothing special should be done if the register files * mismatch. Passing LOG_MISMATCHES will log the registers that don't * match. Passing BAIL_ON_MISMATCH will additionally abort on * mismatch. */ static bool compare_register_files(Task* t, const char* name1, const Registers& reg1, const char* name2, const Registers& reg2, MismatchBehavior mismatch_behavior); bool matches(const Registers& other) const { return compare_register_files(nullptr, nullptr, *this, nullptr, other, EXPECT_MISMATCHES); } /** * Return the total number of registers for this target. */ size_t total_registers() const; // TODO: refactor me to use the GdbRegisterValue helper from // GdbConnection.h. /** * Write the value for register |regno| into |buf|, which should * be large enough to hold any register supported by the target. * Return the size of the register in bytes and set |defined| to * indicate whether a useful value has been written to |buf|. */ size_t read_register(uint8_t* buf, GdbRegister regno, bool* defined) const; /** * Write the value for register |offset| into |buf|, which should * be large enough to hold any register supported by the target. * Return the size of the register in bytes and set |defined| to * indicate whether a useful value has been written to |buf|. * |offset| is the offset of the register within a user_regs_struct. */ size_t read_register_by_user_offset(uint8_t* buf, uintptr_t offset, bool* defined) const; /** * Update the registe named |reg_name| to |value| with * |value_size| number of bytes. */ void write_register(GdbRegister reg_name, const uint8_t* value, size_t value_size); private: template void print_register_file_arch(FILE* f, const char* formats[]) const; enum TraceStyle { Annotated, Raw, }; template void print_register_file_for_trace_arch(FILE* f, TraceStyle style, const char* formats[]) const; template static bool compare_registers_core(const char* name1, const Registers& reg1, const char* name2, const Registers& reg2, MismatchBehavior mismatch_behavior); template static bool compare_registers_arch(const char* name1, const Registers& reg1, const char* name2, const Registers& reg2, MismatchBehavior mismatch_behavior); static bool compare_register_files_internal( const char* name1, const Registers& reg1, const char* name2, const Registers& reg2, MismatchBehavior mismatch_behavior); template size_t read_register_arch(uint8_t* buf, GdbRegister regno, bool* defined) const; template size_t read_register_by_user_offset_arch(uint8_t* buf, uintptr_t offset, bool* defined) const; template void write_register_arch(GdbRegister regno, const uint8_t* value, size_t value_size); template size_t total_registers_arch() const; union AllRegisters { rr::X86Arch::user_regs_struct x86regs; rr::X64Arch::user_regs_struct x64regs; } u; SupportedArch arch_; }; std::ostream& operator<<(std::ostream& stream, const Registers& r); #endif /* RR_REGISTERS_H_ */ rr-4.1.0/src/ReplayCommand.cc000066400000000000000000000357341265436462100160150ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "ReplayCommand" #include #include #include #include #include "Command.h" #include "Flags.h" #include "GdbServer.h" #include "kernel_metadata.h" #include "log.h" #include "main.h" #include "ReplaySession.h" #include "ScopedFd.h" using namespace std; static int DUMP_STATS_PERIOD = 0; class ReplayCommand : public Command { public: virtual int run(std::vector& args); protected: ReplayCommand(const char* name, const char* help) : Command(name, help) {} static ReplayCommand singleton; }; ReplayCommand ReplayCommand::singleton( "replay", " rr replay [OPTION]... []\n" " -a, --autopilot replay without debugger server\n" " -f, --onfork= start a debug server when has been\n" " fork()d, AND the target event has been\n" " reached.\n" " -g, --goto= start a debug server on reaching " "\n" " in the trace. See -M in the general " "options.\n" " -p, --onprocess=|\n" " start a debug server when or " "\n" " has been exec()d, AND the target event has " "been\n" " reached.\n" " -d, --debugger= use as the gdb command\n" " -q, --no-redirect-output don't replay writes to stdout/stderr\n" " -s, --dbgport= only start a debug server on ;\n" " don't automatically launch the debugger\n" " client too.\n" " -t, --trace= singlestep instructions and dump register\n" " states when replaying towards or\n" " later\n" " -x, --gdb-x= execute gdb commands from \n"); struct ReplayFlags { // Start a debug server for the task scheduled at the first // event at which reached this event AND target_process has // been "created". TraceFrame::Time goto_event; TraceFrame::Time singlestep_to_event; pid_t target_process; string target_command; // We let users specify which process should be "created" before // starting a debug session for it. Problem is, "process" in this // context is ambiguous. It could mean the "thread group", which is // created at fork(). Or it could mean the "address space", which is // created at exec() (after the fork). // // We force choosers to specify which they mean. enum { CREATED_NONE, CREATED_EXEC, CREATED_FORK } process_created_how; // Only open a debug socket, don't launch the debugger too. bool dont_launch_debugger; // IP port to listen on for debug connections. int dbg_port; // Pass this file name to debugger with -x string gdb_command_file_path; // Specify a custom gdb binary with -d string gdb_binary_file_path; /* When true, echo tracee stdout/stderr writes to console. */ bool redirect; ReplayFlags() : goto_event(0), singlestep_to_event(0), target_process(0), process_created_how(CREATED_NONE), dont_launch_debugger(false), dbg_port(-1), gdb_binary_file_path("gdb"), redirect(true) {} }; static bool parse_replay_arg(std::vector& args, ReplayFlags& flags) { if (parse_global_option(args)) { return true; } static const OptionSpec options[] = { { 'a', "autopilot", NO_PARAMETER }, { 'd', "debugger", HAS_PARAMETER }, { 's', "dbgport", HAS_PARAMETER }, { 'g', "goto", HAS_PARAMETER }, { 't', "trace", HAS_PARAMETER }, { 'q', "no-redirect-output", NO_PARAMETER }, { 'f', "onfork", HAS_PARAMETER }, { 'p', "onprocess", HAS_PARAMETER }, { 'x', "gdb-x", HAS_PARAMETER } }; ParsedOption opt; if (!Command::parse_option(args, options, &opt)) { return false; } switch (opt.short_name) { case 'a': flags.goto_event = numeric_limits::max(); flags.dont_launch_debugger = true; break; case 'd': flags.gdb_binary_file_path = opt.value; break; case 'f': if (!opt.verify_valid_int(1, INT32_MAX)) { return false; } flags.target_process = opt.int_value; flags.process_created_how = ReplayFlags::CREATED_FORK; break; case 'g': if (!opt.verify_valid_int(1, UINT32_MAX)) { return false; } flags.goto_event = opt.int_value; break; case 'p': if (opt.int_value > 0) { if (!opt.verify_valid_int(1, INT32_MAX)) { return false; } flags.target_process = opt.int_value; } else { flags.target_command = opt.value; } flags.process_created_how = ReplayFlags::CREATED_EXEC; break; case 'q': flags.redirect = false; break; case 's': if (!opt.verify_valid_int(1, INT32_MAX)) { return false; } flags.dbg_port = opt.int_value; flags.dont_launch_debugger = true; break; case 't': if (!opt.verify_valid_int(1, INT32_MAX)) { return false; } flags.singlestep_to_event = opt.int_value; break; case 'x': flags.gdb_command_file_path = opt.value; break; default: assert(0 && "Unknown option"); } return true; } static int find_pid_for_command(const string& trace_dir, const string& command) { TraceReader trace(trace_dir); while (trace.good()) { auto e = trace.read_task_event(); if (e.type() != TraceTaskEvent::EXEC) { continue; } if (e.cmd_line().empty()) { continue; } auto& cmd = e.cmd_line()[0]; if (cmd == command || (cmd.size() > command.size() && cmd.substr(cmd.size() - command.size() - 1) == ('/' + command))) { return e.tid(); } } return -1; } static bool pid_exists(const string& trace_dir, pid_t pid) { TraceReader trace(trace_dir); while (trace.good()) { auto e = trace.read_task_event(); if (e.tid() == pid) { return true; } } return false; } static bool pid_execs(const string& trace_dir, pid_t pid) { TraceReader trace(trace_dir); while (trace.good()) { auto e = trace.read_task_event(); if (e.tid() == pid && e.type() == TraceTaskEvent::EXEC) { return true; } } return false; } // The parent process waits until the server, |waiting_for_child|, creates a // debug socket. Then the parent exec()s the debugger over itself. While it's // waiting for the child, this is the child's pid. // This needs to be global because it's used by a signal handler. static pid_t waiting_for_child; static ReplaySession::Flags session_flags(ReplayFlags flags) { ReplaySession::Flags result; result.redirect_stdio = flags.redirect; return result; } static uint64_t to_microseconds(const struct timeval& tv) { return (uint64_t)tv.tv_sec * 1000000 + tv.tv_usec; } static void serve_replay_no_debugger(const string& trace_dir, const ReplayFlags& flags) { ReplaySession::shr_ptr replay_session = ReplaySession::create(trace_dir); replay_session->set_flags(session_flags(flags)); uint32_t step_count = 0; struct timeval last_dump_time; Session::Statistics last_stats; gettimeofday(&last_dump_time, NULL); while (true) { RunCommand cmd = RUN_CONTINUE; if (flags.singlestep_to_event > 0 && replay_session->trace_reader().time() >= flags.singlestep_to_event) { cmd = RUN_SINGLESTEP; fputs("Stepping from:", stderr); Task* t = replay_session->current_task(); t->regs().print_register_file_compact(stderr); fprintf(stderr, " ticks:%" PRId64 "\n", t->tick_count()); } TraceFrame::Time before_time = replay_session->trace_reader().time(); auto result = replay_session->replay_step(cmd); TraceFrame::Time after_time = replay_session->trace_reader().time(); assert(after_time >= before_time && after_time <= before_time + 1); ++step_count; if (DUMP_STATS_PERIOD > 0 && step_count % DUMP_STATS_PERIOD == 0) { struct timeval now; gettimeofday(&now, NULL); Session::Statistics stats = replay_session->statistics(); printf( "[ReplayStatistics] ticks %lld syscalls %lld bytes_written %lld " "microseconds %lld\n", (long long)(stats.ticks_processed - last_stats.ticks_processed), (long long)(stats.syscalls_performed - last_stats.syscalls_performed), (long long)(stats.bytes_written - last_stats.bytes_written), (long long)(to_microseconds(now) - to_microseconds(last_dump_time))); last_dump_time = now; last_stats = stats; } if (result.status == REPLAY_EXITED) { break; } assert(result.status == REPLAY_CONTINUE); assert(result.break_status.watchpoints_hit.empty()); assert(!result.break_status.breakpoint_hit); assert(cmd == RUN_SINGLESTEP || !result.break_status.singlestep_complete); } LOG(info) << ("Replayer successfully finished."); } /* Handling ctrl-C during replay: * We want the entire group of processes to remain a single process group * since that allows shell job control to work best. * We want ctrl-C to not reach tracees, because that would disturb replay. * That's taken care of by Task::set_up_process. * We allow terminal SIGINT to go directly to the parent and the child (rr). * rr's SIGINT handler |handle_SIGINT_in_child| just interrupts the replay * if we're in the process of replaying to a target event, otherwise it * does nothing. * Before the parent execs gdb, its SIGINT handler does nothing. After exec, * the signal handler is reset to default so gdb behaves as normal (which is * why we use a signal handler instead of SIG_IGN). */ static void handle_SIGINT_in_parent(int sig) { assert(sig == SIGINT); // Just ignore it. } static GdbServer* server_ptr = nullptr; static void handle_SIGINT_in_child(int sig) { assert(sig == SIGINT); if (server_ptr) { server_ptr->interrupt_replay_to_target(); } } static int replay(const string& trace_dir, const ReplayFlags& flags) { GdbServer::Target target; switch (flags.process_created_how) { case ReplayFlags::CREATED_EXEC: target.pid = flags.target_process; target.require_exec = true; break; case ReplayFlags::CREATED_FORK: target.pid = flags.target_process; target.require_exec = false; break; case ReplayFlags::CREATED_NONE: break; } target.event = flags.goto_event; // If we're not going to autolaunch the debugger, don't go // through the rigamarole to set that up. All it does is // complicate the process tree and confuse users. if (flags.dont_launch_debugger) { if (target.event == numeric_limits::max()) { serve_replay_no_debugger(trace_dir, flags); } else { auto session = ReplaySession::create(trace_dir); GdbServer::ConnectionFlags conn_flags; conn_flags.dbg_port = flags.dbg_port; GdbServer(session, session_flags(flags), target).serve_replay(conn_flags); } return 0; } int debugger_params_pipe[2]; if (pipe2(debugger_params_pipe, O_CLOEXEC)) { FATAL() << "Couldn't open debugger params pipe."; } if (0 == (waiting_for_child = fork())) { // Ensure only the parent has the read end of the pipe open. Then if // the parent dies, our writes to the pipe will error out. close(debugger_params_pipe[0]); ScopedFd debugger_params_write_pipe(debugger_params_pipe[1]); auto session = ReplaySession::create(trace_dir); GdbServer::ConnectionFlags conn_flags; conn_flags.dbg_port = flags.dbg_port; conn_flags.debugger_params_write_pipe = &debugger_params_write_pipe; GdbServer server(session, session_flags(flags), target); server_ptr = &server; struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_flags = SA_RESTART; sa.sa_handler = handle_SIGINT_in_child; if (sigaction(SIGINT, &sa, nullptr)) { FATAL() << "Couldn't set sigaction for SIGINT."; } server.serve_replay(conn_flags); return 0; } // Ensure only the child has the write end of the pipe open. Then if // the child dies, our reads from the pipe will return EOF. close(debugger_params_pipe[1]); LOG(debug) << getpid() << ": forked debugger server " << waiting_for_child; struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_flags = SA_RESTART; sa.sa_handler = handle_SIGINT_in_parent; if (sigaction(SIGINT, &sa, nullptr)) { FATAL() << "Couldn't set sigaction for SIGINT."; } { ScopedFd params_pipe_read_fd(debugger_params_pipe[0]); GdbServer::launch_gdb(params_pipe_read_fd, flags.gdb_command_file_path, flags.gdb_binary_file_path); } // Child must have died before we were able to get debugger parameters // and exec gdb. Exit with the exit status of the child. while (true) { int status; int ret = waitpid(waiting_for_child, &status, 0); int err = errno; LOG(debug) << getpid() << ": waitpid(" << waiting_for_child << ") returned " << strerror(err) << "(" << err << "); status:" << HEX(status); if (waiting_for_child != ret) { if (EINTR == err) { continue; } FATAL() << getpid() << ": waitpid(" << waiting_for_child << ") failed"; } if (WIFEXITED(status) || WIFSIGNALED(status)) { LOG(info) << ("Debugger server died. Exiting."); exit(WIFEXITED(status) ? WEXITSTATUS(status) : 1); } } return 0; } int ReplayCommand::run(std::vector& args) { if (getenv("RUNNING_UNDER_RR")) { fprintf(stderr, "rr: cannot run rr replay under rr. Exiting.\n"); return 1; } bool found_dir = false; string trace_dir; ReplayFlags flags; while (!args.empty()) { if (parse_replay_arg(args, flags)) { continue; } if (!found_dir && parse_optional_trace_dir(args, &trace_dir)) { found_dir = true; continue; } print_help(stderr); return 1; } if (!flags.target_command.empty()) { flags.target_process = find_pid_for_command(trace_dir, flags.target_command); if (flags.target_process <= 0) { fprintf(stderr, "No process '%s' found. Try 'rr ps'.\n", flags.target_command.c_str()); return 2; } } if (flags.process_created_how != ReplayFlags::CREATED_NONE) { if (!pid_exists(trace_dir, flags.target_process)) { fprintf(stderr, "No process %d found in trace. Try 'rr ps'.\n", flags.target_process); return 2; } if (flags.process_created_how == ReplayFlags::CREATED_EXEC && !pid_execs(trace_dir, flags.target_process)) { fprintf(stderr, "Process %d never exec()ed. Try 'rr ps', or use " "'-f'.\n", flags.target_process); return 2; } } assert_prerequisites(); check_performance_settings(); return replay(trace_dir, flags); } rr-4.1.0/src/ReplaySession.cc000066400000000000000000001444131265436462100160550ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "ReplaySession" #define USE_BREAKPOINT_TARGET 1 #include "ReplaySession.h" #include #include #include #include "AutoRemoteSyscalls.h" #include "fast_forward.h" #include "kernel_metadata.h" #include "log.h" #include "replay_syscall.h" #include "task.h" #include "util.h" using namespace rr; using namespace std; /* Why a skid region? Interrupts generated by perf counters don't * fire at exactly the programmed point (as of 2013 kernel/HW); * there's a variable slack region, which is technically unbounded. * This means that an interrupt programmed for retired branch k might * fire at |k + 50|, for example. To counteract the slack, we program * interrupts just short of our target, by the |SKID_SIZE| region * below, and then more slowly advance to the real target. * * How was this magic number determined? Trial and error: we want it * to be as small as possible for efficiency, but not so small that * overshoots are observed. If all other possible causes of overshoot * have been ruled out, like memory divergence, then you'll know that * this magic number needs to be increased if the following symptom is * observed during replay. Running with DEBUGLOG enabled (see above), * a sequence of log messages like the following will appear * * 1. programming interrupt for [target - SKID_SIZE] ticks * 2. Error: Replay diverged. Dumping register comparison. * 3. Error: [list of divergent registers; arbitrary] * 4. Error: overshot target ticks=[target] by [i] * * The key is that no other replayer log messages occur between (1) * and (2). This spew means that the replayer programmed an interrupt * for ticks=[target-SKID_SIZE], but the tracee was actually interrupted * at ticks=[target+i]. And that in turn means that the kernel/HW * skidded too far past the programmed target for rr to handle it. * * If that occurs, the SKID_SIZE needs to be increased by at least * [i]. * * NB: there are probably deeper reasons for the target slack that * could perhaps let it be deduced instead of arrived at empirically; * perhaps pipeline depth and things of that nature are involved. But * those reasons if they exit are currently not understood. */ static const int SKID_SIZE = 70; static void debug_memory(Task* t) { if (should_dump_memory(t, t->current_trace_frame())) { dump_process_memory(t, t->current_trace_frame().time(), "rep"); } if (t->session().can_validate() && should_checksum(t, t->current_trace_frame())) { /* Validate the checksum we computed during the * recording phase. */ validate_process_memory(t, t->current_trace_frame().time()); } } ReplaySession::~ReplaySession() { // We won't permanently leak any OS resources by not ensuring // we've cleaned up here, but sessions can be created and // destroyed many times, and we don't want to temporarily hog // resources. kill_all_tasks(); assert(task_map.empty() && vm_map.empty()); gc_emufs(); assert(emufs().size() == 0); } ReplaySession::shr_ptr ReplaySession::clone() { LOG(debug) << "Deepforking ReplaySession " << this << " ..."; finish_initializing(); shr_ptr session(new ReplaySession(*this)); LOG(debug) << " deepfork session is " << session.get(); copy_state_to(*session, session->emufs()); return session; } /** * Return true if it's possible/meaningful to make a checkpoint at the * |frame| that |t| will replay. */ static bool can_checkpoint_at(Task* t, const TraceFrame& frame) { const Event& ev = frame.event(); if (ev.has_ticks_slop()) { return false; } switch (ev.type()) { case EV_EXIT: case EV_UNSTABLE_EXIT: // At exits, we can't clone the exiting tasks, so // don't event bother trying to checkpoint. case EV_SYSCALLBUF_RESET: // RESETs are usually inserted in between syscall // entry/exit. Do not attempting to checkpoint at // RESETs. Users would never want to do that anyway. case EV_TRACE_TERMINATION: // There's nothing to checkpoint at the end of an // early-terminated trace. return false; default: return true; } } bool ReplaySession::can_clone() { finish_initializing(); Task* t = current_task(); return t && can_validate() && can_checkpoint_at(t, current_trace_frame()); } DiversionSession::shr_ptr ReplaySession::clone_diversion() { finish_initializing(); LOG(debug) << "Deepforking ReplaySession " << this << " to DiversionSession..."; DiversionSession::shr_ptr session(new DiversionSession(*this)); LOG(debug) << " deepfork session is " << session.get(); copy_state_to(*session, session->emufs()); session->finish_initializing(); return session; } void ReplaySession::gc_emufs() { emu_fs->gc(*this); } void ReplaySession::maybe_gc_emufs(SupportedArch arch, int syscallno) { if (is_close_syscall(syscallno, arch) || is_munmap_syscall(syscallno, arch)) { gc_emufs(); } } /*static*/ ReplaySession::shr_ptr ReplaySession::create(const string& dir) { shr_ptr session(new ReplaySession(dir)); // Because we execvpe() the tracee, we must ensure that $PATH // is the same as in recording so that libc searches paths in // the same order. So copy that over now. // // And because we use execvpe(), the exec'd tracee will start // with a fresh environment guaranteed to be the same as in // replay, so we don't have to worry about any mutation here // affecting post-exec execution. for (auto& e : session->trace_in.initial_envp()) { if (e.find("PATH=") == 0) { // NB: intentionally leaking this string. putenv(strdup(e.c_str())); } } Task* t = Task::spawn(*session, session->trace_in, session->trace_reader().peek_frame().tid()); session->on_create(t); return session; } void ReplaySession::advance_to_next_trace_frame(TraceFrame::Time stop_at_time) { if (trace_in.at_end()) { return; } trace_frame = trace_in.read_frame(); } bool ReplaySession::is_ignored_signal(int sig) { switch (sig) { // TIME_SLICE_SIGNALs can be queued but not delivered before we stop // execution for some other reason. Ignore them. case PerfCounters::TIME_SLICE_SIGNAL: return true; default: return false; } } static bool compute_ticks_request( Task* t, const ReplaySession::StepConstraints& constraints, TicksRequest* ticks_request) { *ticks_request = RESUME_UNLIMITED_TICKS; if (constraints.ticks_target > 0) { Ticks ticks_period = constraints.ticks_target - SKID_SIZE - t->tick_count(); if (ticks_period <= 0) { // Behave as if we actually executed something. Callers assume we did. t->clear_wait_status(); return false; } *ticks_request = (TicksRequest)ticks_period; } return true; } /** * Continue until reaching either the "entry" of an emulated syscall, * or the entry or exit of an executed syscall. |emu| is nonzero when * we're emulating the syscall. Return COMPLETE when the next syscall * boundary is reached, or INCOMPLETE if advancing to the boundary was * interrupted by an unknown trap. * When |syscall_trace_frame| is non-null, we continue to the syscall by * setting a breakpoint instead of running until we execute a system * call instruction. In that case we will not actually enter the kernel. */ Completion ReplaySession::cont_syscall_boundary( Task* t, const StepConstraints& constraints) { TicksRequest ticks_request; if (!compute_ticks_request(t, constraints, &ticks_request)) { return INCOMPLETE; } if (constraints.command == RUN_SINGLESTEP_FAST_FORWARD) { // ignore ticks_period. We can't add more than one tick during a // fast_forward so it doesn't matter. did_fast_forward |= fast_forward_through_instruction( t, RESUME_SYSEMU_SINGLESTEP, constraints.stop_before_states); } else { ResumeRequest resume_how = constraints.is_singlestep() ? RESUME_SYSEMU_SINGLESTEP : RESUME_SYSEMU; t->resume_execution(resume_how, RESUME_WAIT, ticks_request); } if (t->pending_sig() == PerfCounters::TIME_SLICE_SIGNAL) { // This would normally be triggered by constraints.ticks_target but it's // also possible to get stray signals here. return INCOMPLETE; } if (is_ignored_signal(t->pending_sig())) { return cont_syscall_boundary(t, constraints); } if (SIGTRAP == t->pending_sig()) { return INCOMPLETE; } ASSERT(t, !t->pending_sig()) << "Replay got unrecorded signal " << t->pending_sig() << " (" << signal_name(t->pending_sig()) << ")"; return COMPLETE; } /** * Advance to the next syscall entry (or virtual entry) according to * |step|. Return COMPLETE if successful, or INCOMPLETE if an unhandled trap * occurred. */ Completion ReplaySession::enter_syscall(Task* t, const StepConstraints& constraints) { bool use_breakpoint_optimization = false; remote_code_ptr syscall_instruction; if (can_validate()) { syscall_instruction = current_trace_frame().regs().ip().decrement_by_syscall_insn_length( t->arch()); // Skip this optimization if we can't set the breakpoint, or if it's // in writeable or shared memory, since in those cases it could be // overwritten by the tracee. It could even be dynamically generated and // not generated yet. if (t->vm()->is_breakpoint_in_private_read_only_memory( syscall_instruction) && t->vm()->add_breakpoint(syscall_instruction, TRAP_BKPT_INTERNAL)) { use_breakpoint_optimization = true; } } if (cont_syscall_boundary(t, constraints) == INCOMPLETE) { bool reached_target = use_breakpoint_optimization && SIGTRAP == t->pending_sig() && t->ip().decrement_by_bkpt_insn_length(t->arch()) == syscall_instruction && t->vm()->get_breakpoint_type_at_addr(syscall_instruction) == TRAP_BKPT_INTERNAL; if (reached_target) { // Emulate syscall state change Registers r = t->regs(); r.set_ip(syscall_instruction.increment_by_syscall_insn_length(t->arch())); r.set_original_syscallno(r.syscallno()); r.set_syscall_result(-ENOSYS); t->emulate_syscall_entry(r); t->validate_regs(); } if (use_breakpoint_optimization) { t->vm()->remove_breakpoint(syscall_instruction, TRAP_BKPT_INTERNAL); } if (!reached_target) { return INCOMPLETE; } } else { // If we use the breakpoint optimization, we must get a SIGTRAP before // reaching a syscall, so cont_syscall_boundary must return INCOMPLETE. ASSERT(t, !use_breakpoint_optimization); t->validate_regs(); t->finish_emulated_syscall(); } return COMPLETE; } /** * Advance past the reti (or virtual reti) according to |step|. * Return COMPLETE if successful, or INCOMPLETE if an unhandled trap occurred. */ Completion ReplaySession::exit_syscall(Task* t, const StepConstraints& constraints) { t->on_syscall_exit(current_step.syscall.number, current_trace_frame().regs()); t->apply_all_data_records_from_trace(); t->set_return_value_from_trace(); uint32_t flags = 0; if (t->arch() == SupportedArch::x86 && (X86Arch::pwrite64 == current_step.syscall.number || X86Arch::pread64 == current_step.syscall.number)) { flags |= Task::IGNORE_ESI; } t->validate_regs(flags); return COMPLETE; } void ReplaySession::check_pending_sig(Task* t) { ASSERT(t, 0 < t->pending_sig()) << "Replaying `" << trace_frame.event() << "': expecting tracee signal or trap, but instead at `" << t->syscall_name(t->regs().original_syscallno()) << "' (ticks: " << t->tick_count() << ")"; } /** * Advance |t| to the next signal or trap. If |stepi| is |SINGLESTEP|, * then execution resumes by single-stepping. Otherwise it continues * normally. |t->pending_sig()| contains any pending signal. * * Default |resume_how| is RESUME_SYSCALL for error checking: * since the next event is supposed to be a signal, * entering a syscall here means divergence. There * shouldn't be any straight-line execution overhead * for SYSCALL vs. CONT, so the difference in cost * should be neglible. * * Some callers pass RESUME_CONT because they want to execute any syscalls * encountered. */ void ReplaySession::continue_or_step(Task* t, const StepConstraints& constraints, TicksRequest tick_request, ResumeRequest resume_how) { if (constraints.command == RUN_SINGLESTEP) { t->resume_execution(RESUME_SINGLESTEP, RESUME_WAIT, tick_request); } else if (constraints.command == RUN_SINGLESTEP_FAST_FORWARD) { did_fast_forward |= fast_forward_through_instruction( t, RESUME_SINGLESTEP, constraints.stop_before_states); } else { t->resume_execution(resume_how, RESUME_WAIT, tick_request); } check_pending_sig(t); } /** * Return nonzero if |t| was stopped for a breakpoint trap (int3), * as opposed to a trace trap. Return zero in the latter case. */ static bool is_breakpoint_trap(Task* t) { const siginfo_t& si = t->get_siginfo(); assert(SIGTRAP == si.si_signo); /* XXX unable to find docs on which of these "should" be * right. The SI_KERNEL code is seen in the int3 test, so we * at least need to handle that. */ return SI_KERNEL == si.si_code || TRAP_BRKPT == si.si_code; } /** * Return one of the (non-zero) enumerated TRAP_* debugger-trap types * above if the SIGTRAP generated by the child is intended for the * debugger, or zero if it's meant for rr internally. * * NB: calling this function while advancing the ticks counter through hpc * interrupts when emulating asynchronous signal delivery *will* * result in bad results. Don't call this function from there; it's * not necessary. */ enum ExecStateType { UNKNOWN, NOT_AT_TARGET, AT_TARGET }; TrapType ReplaySession::compute_trap_type(Task* t, int target_sig, SignalDeterministic deterministic, ExecStateType exec_state, const StepConstraints& constraints) { TrapType trap_type; assert(SIGTRAP == t->pending_sig()); /* We're not replaying a trap, and it was clearly raised on * behalf of the debugger. (The debugger will verify * that.) */ if (SIGTRAP != target_sig /* Replay of deterministic signals never internally * single-steps or sets internal breakpoints. */ && (DETERMINISTIC_SIG == deterministic /* Replay of async signals will sometimes internally * single-step when advancing to an execution target, * so the trap was only clearly for the debugger if * the debugger was requesting single-stepping. */ || (constraints.is_singlestep() && NOT_AT_TARGET == exec_state))) { return constraints.is_singlestep() ? TRAP_STEPI : TRAP_BKPT_USER; } /* We're trying to replay a deterministic SIGTRAP, or we're * replaying an async signal. */ trap_type = t->vm()->get_breakpoint_type_for_retired_insn(t->ip()); if (TRAP_BKPT_USER == trap_type || TRAP_BKPT_INTERNAL == trap_type) { assert(is_breakpoint_trap(t)); return trap_type; } if (is_breakpoint_trap(t)) { /* We successfully replayed a recorded deterministic * SIGTRAP. (Because it must have been raised by an * |int3|, but not one we injected.) Not for the * debugger, although we'll end up notifying it * anyway. */ assert(DETERMINISTIC_SIG == deterministic); return TRAP_NONE; } if (DETERMINISTIC_SIG == deterministic) { /* If the delivery of SIGTRAP is supposed to be * deterministic and we didn't just retire an |int 3| * and this wasn't a breakpoint, we must have been * single stepping. So definitely for the * debugger. */ assert(constraints.is_singlestep()); return TRAP_STEPI; } /* We're replaying an async signal. */ if (AT_TARGET == exec_state) { /* If we're at the target of the async signal * delivery, prefer delivering the signal to retiring * a possible debugger single-step; we'll notify the * debugger anyway. */ return TRAP_NONE; } /* Otherwise, we're not at the execution target, so may have * been internally single-stepping. We'll notify the debugger * if it was also requesting single-stepping. The debugger * won't care about the rr-internal trap if it wasn't * requesting single-stepping. */ return constraints.is_singlestep() ? TRAP_STEPI : TRAP_NONE; } /** * Shortcut for callers that don't care about internal breakpoints. * Return nonzero if |t|'s |pending_sig()| is for the debugger, zero otherwise. */ bool ReplaySession::is_debugger_trap(Task* t, int target_sig, SignalDeterministic deterministic, ExecStateType exec_state, const StepConstraints& constraints) { TrapType type = compute_trap_type(t, target_sig, deterministic, exec_state, constraints); assert(TRAP_BKPT_INTERNAL != type); return TRAP_NONE != type; } static void guard_overshoot(Task* t, const Registers& target_regs, Ticks target_ticks, Ticks remaining_ticks, const Registers* closest_matching_regs) { if (remaining_ticks < 0) { remote_code_ptr target_ip = target_regs.ip(); /* Cover up the internal breakpoint that we may have * set, and restore the tracee's $ip to what it would * have been had it not hit the breakpoint (if it did * hit the breakpoint).*/ t->vm()->remove_breakpoint(target_ip, TRAP_BKPT_INTERNAL); if (t->regs().ip() == target_ip.increment_by_bkpt_insn_length(t->arch())) { t->move_ip_before_breakpoint(); } if (closest_matching_regs) { LOG(error) << "Replay diverged; target registers at ticks target mismatched: "; Registers::compare_register_files(t, "rep overshoot", t->regs(), "rec", *closest_matching_regs, LOG_MISMATCHES); } else { LOG(error) << "Replay diverged; target registers mismatched: "; Registers::compare_register_files(t, "rep overshoot", t->regs(), "rec", target_regs, LOG_MISMATCHES); } ASSERT(t, false) << "overshot target ticks=" << target_ticks << " by " << -remaining_ticks; } } static void guard_unexpected_signal(Task* t) { if (ReplaySession::is_ignored_signal(t->pending_sig()) || SIGTRAP == t->pending_sig()) { return; } Event ev; if (t->pending_sig()) { ev = SignalEvent(t->pending_sig(), NONDETERMINISTIC_SIG, t->arch()); } else { ev = SyscallEvent(max(0L, (long)t->regs().original_syscallno()), t->arch()); } ASSERT(t, false) << "Replay got unrecorded event " << ev << " while awaiting signal"; } static bool is_same_execution_point(Task* t, const Registers& rec_regs, Ticks ticks_left, Registers* mismatched_regs, const Registers** mismatched_regs_ptr) { MismatchBehavior behavior = #ifdef DEBUGTAG LOG_MISMATCHES #else EXPECT_MISMATCHES #endif ; if (ticks_left != 0) { LOG(debug) << " not same execution point: " << ticks_left << " ticks left (@" << rec_regs.ip() << ")"; #ifdef DEBUGTAG Registers::compare_register_files(t, "(rep)", t->regs(), "(rec)", rec_regs, LOG_MISMATCHES); #endif return false; } if (!Registers::compare_register_files(t, "rep", t->regs(), "rec", rec_regs, behavior)) { LOG(debug) << " not same execution point: regs differ (@" << rec_regs.ip() << ")"; *mismatched_regs = t->regs(); *mismatched_regs_ptr = mismatched_regs; return false; } LOG(debug) << " same execution point"; return true; } /** * Run execution forwards for |t| until |ticks| is reached, and the $ip * reaches the recorded $ip. Return COMPLETE if successful or INCOMPLETE if an * unhandled interrupt occurred. |sig| is the pending signal to be * delivered; it's only used to distinguish debugger-related traps * from traps related to replaying execution. |ticks| is an inout param * that will be decremented by branches retired during this attempted * step. */ Completion ReplaySession::advance_to(Task* t, const Registers& regs, int sig, const StepConstraints& constraints, Ticks ticks) { remote_code_ptr ip = regs.ip(); Ticks ticks_left; bool did_set_internal_breakpoint = false; /* Step 1: advance to the target ticks (minus a slack region) as * quickly as possible by programming the hpc. */ ticks_left = ticks - t->tick_count(); LOG(debug) << "advancing " << ticks_left << " ticks to reach " << ticks << "/" << ip; /* XXX should we only do this if (ticks > 10000)? */ while (ticks_left - SKID_SIZE > SKID_SIZE) { LOG(debug) << " programming interrupt for " << (ticks_left - SKID_SIZE) << " ticks"; continue_or_step(t, constraints, (TicksRequest)(ticks_left - SKID_SIZE)); guard_unexpected_signal(t); ticks_left = ticks - t->tick_count(); if (SIGTRAP == t->pending_sig()) { /* We proved we're not at the execution * target, and we haven't set any internal * breakpoints, and we're not temporarily * internally single-stepping, so we must have * hit a debugger breakpoint or the debugger * was single-stepping the tracee. (The * debugging code will verify that.) */ return INCOMPLETE; } } guard_overshoot(t, regs, ticks, ticks_left, NULL); /* True when our advancing has triggered a tracee SIGTRAP that needs to * be dealt with. */ bool pending_SIGTRAP = false; /* Step 2: more slowly, find our way to the target ticks and * execution point. We set an internal breakpoint on the * target $ip and then resume execution. When that *internal* * breakpoint is hit (i.e., not one incidentally also set on * that $ip by the debugger), we check again if we're at the * target ticks and execution point. If not, we temporarily * remove the breakpoint, single-step over the insn, and * repeat. * * What we really want to do is set a (precise) * retired-instruction interrupt and do away with all this * cruft. */ Registers mismatched_regs; const Registers* mismatched_regs_ptr = NULL; while (true) { /* Invariants here are * o ticks_left is up-to-date * o ticks_left >= 0 * * Possible state of the execution of |t| * 0. at a debugger trap (breakpoint or stepi) * 1. at an internal breakpoint * 2. at the execution target * 3. not at the execution target, but incidentally * at the target $ip * 4. otherwise not at the execution target * * Determining whether we're at a debugger trap is * surprisingly complicated, but we delegate the work * to |compute_debugger_trap()|. The rest can be * straightforwardly computed with ticks value and * registers. */ bool at_target = is_same_execution_point( t, regs, ticks_left, &mismatched_regs, &mismatched_regs_ptr); if (pending_SIGTRAP) { TrapType trap_type = compute_trap_type(t, sig, NONDETERMINISTIC_SIG, at_target ? AT_TARGET : NOT_AT_TARGET, constraints); switch (trap_type) { case TRAP_BKPT_USER: case TRAP_STEPI: /* Case (0) above: interrupt for the * debugger. */ LOG(debug) << " trap was debugger interrupt " << trap_type; if (did_set_internal_breakpoint) { t->vm()->remove_breakpoint(ip, TRAP_BKPT_INTERNAL); did_set_internal_breakpoint = false; } return INCOMPLETE; case TRAP_BKPT_INTERNAL: { /* Case (1) above: cover the tracks of * our internal breakpoint, and go * check again if we're at the * target. */ LOG(debug) << " trap was for target $ip"; /* (The breakpoint would have trapped * at the $ip one byte beyond the * target.) */ assert(!at_target); pending_SIGTRAP = false; t->move_ip_before_breakpoint(); /* We just backed up the $ip, but * rewound it over an |int $3| * instruction, which couldn't have * retired a branch. So we don't need * to adjust |ticks_count()|. */ continue; } case TRAP_NONE: /* Otherwise, we must have been forced * to single-step because the tracee's * $ip was incidentally the same as * the target. Unfortunately, it's * awkward to assert that here, so we * don't yet. TODO. */ LOG(debug) << " (SIGTRAP; stepi'd target $ip)"; break; } } /* We had to keep the internal breakpoint set (if it * was when we entered the loop) for the checks above. * But now we're either done (at the target) or about * to resume execution in one of a variety of ways, * and it's simpler to start out knowing that the * breakpoint isn't set. */ if (did_set_internal_breakpoint) { t->vm()->remove_breakpoint(ip, TRAP_BKPT_INTERNAL); did_set_internal_breakpoint = false; } if (at_target) { /* Case (2) above: done. */ return COMPLETE; } /* At this point, we've proven that we're not at the * target execution point, and we've ensured the * internal breakpoint is unset. */ if (USE_BREAKPOINT_TARGET && regs.ip() != t->regs().ip()) { /* Case (4) above: set a breakpoint on the * target $ip and PTRACE_CONT in an attempt to * execute as many non-trapped insns as we * can. (Unless the debugger is stepping, of * course.) Trapping and checking * are-we-at-target is slow. It bears * repeating that the ideal implementation * would be programming a precise counter * interrupt (insns-retired best of all), but * we're forced to be conservative by observed * imprecise counters. This should still be * no slower than single-stepping our way to * the target execution point. */ LOG(debug) << " breaking on target $ip"; t->vm()->add_breakpoint(ip, TRAP_BKPT_INTERNAL); did_set_internal_breakpoint = true; continue_or_step(t, constraints, RESUME_UNLIMITED_TICKS); } else { /* Case (3) above: we can't put a breakpoint * on the $ip, because resuming execution * would just trap and we'd be back where we * started. Single-step or fast-forward past it. */ LOG(debug) << " (fast-forwarding over target $ip)"; if (constraints.command == RUN_SINGLESTEP) { continue_or_step(t, constraints, RESUME_UNLIMITED_TICKS); } else { vector states = constraints.stop_before_states; // This state may not be relevant if we don't have the correct tick // count yet. But it doesn't hurt to push it on anyway. states.push_back(®s); did_fast_forward |= fast_forward_through_instruction(t, RESUME_SINGLESTEP, states); check_pending_sig(t); } } pending_SIGTRAP = SIGTRAP == t->pending_sig(); /* Maintain the "'ticks_left'-is-up-to-date" * invariant. */ ticks_left = ticks - t->tick_count(); /* Sometimes (e.g. in the ptrace_signal_32 test), we're in almost * the correct state when we enter |advance_to|, except that exotic * registers (i.e. segment registers) need to be normalized by the kernel * by continuing and hitting a deterministic signal without actually * advancing execution. So we allow |advance_to| to proceed and actually * reach the desired state. */ if (!is_same_execution_point(t, regs, ticks_left, &mismatched_regs, &mismatched_regs_ptr)) { guard_unexpected_signal(t); } guard_overshoot(t, regs, ticks, ticks_left, mismatched_regs_ptr); } } static bool is_fatal_default_action(int sig) { signal_action action = default_action(sig); return action == DUMP_CORE || action == TERMINATE; } /** * Emulates delivery of |sig| to |oldtask|. Returns INCOMPLETE if * emulation was interrupted, COMPLETE if completed. */ Completion ReplaySession::emulate_signal_delivery( Task* oldtask, int sig, const StepConstraints& constraints) { Task* t = current_task(); if (!t) { // Trace terminated abnormally. We'll pop out to code // that knows what to do. return INCOMPLETE; } ASSERT(oldtask, t == oldtask) << "emulate_signal_delivery changed task"; const Event& ev = trace_frame.event(); ASSERT(t, ev.type() == EV_SIGNAL_DELIVERY || ev.type() == EV_SIGNAL_HANDLER) << "Unexpected signal disposition"; // Entering a signal handler seems to clear FP/SSE registers for some // reason. So we saved those cleared values, and now we restore that // state so they're cleared during replay. if (ev.type() == EV_SIGNAL_HANDLER) { t->set_extra_regs(trace_frame.extra_regs()); } /* Restore the signal-hander frame data, if there was one. */ SignalDeterministic deterministic = ev.Signal().deterministic; bool restored_sighandler_frame = 0 < t->set_data_from_trace(); if (restored_sighandler_frame) { t->push_event(SignalEvent(sig, deterministic, t->arch())); t->ev().transform(EV_SIGNAL_DELIVERY); LOG(debug) << "--> restoring sighandler frame for " << signal_name(sig); t->ev().transform(EV_SIGNAL_HANDLER); } // Note that fatal signals are not actually injected into the task! // This is very important; we must never actually inject fatal signals // into a task. All replay task death must go through exit_task. /* If this signal had a user handler, and we just set up the * callframe, and we need to restore the $sp for continued * execution. */ t->set_regs(trace_frame.regs()); t->validate_regs(); return COMPLETE; } void ReplaySession::check_ticks_consistency(Task* t, const Event& ev) { if (!can_validate()) { return; } Ticks ticks_now = t->tick_count(); Ticks trace_ticks = trace_frame.ticks(); ASSERT(t, ticks_now == trace_ticks) << "ticks mismatch for '" << ev << "'; expected " << trace_ticks << ", got " << ticks_now << ""; } static bool treat_signal_event_as_deterministic(const SignalEvent& ev) { return ev.deterministic == DETERMINISTIC_SIG; } /** * Advance to the delivery of the deterministic signal |sig| and * update registers to what was recorded. Return COMPLETE if successful or * INCOMPLETE if an unhandled interrupt occurred. */ Completion ReplaySession::emulate_deterministic_signal( Task* t, int sig, const StepConstraints& constraints) { if (t->regs().matches(trace_frame.regs()) && t->tick_count() == trace_frame.ticks()) { // We're already at the target. This can happen when multiple signals // are delivered with no intervening execution. return COMPLETE; } continue_or_step(t, constraints, RESUME_UNLIMITED_TICKS); if (is_ignored_signal(t->pending_sig())) { return emulate_deterministic_signal(t, sig, constraints); } if (SIGTRAP == t->pending_sig() && is_debugger_trap(t, sig, DETERMINISTIC_SIG, UNKNOWN, constraints)) { return INCOMPLETE; } ASSERT(t, t->pending_sig() == sig) << "Replay got unrecorded signal " << t->pending_sig() << " (expecting " << sig << ")"; const Event& ev = trace_frame.event(); check_ticks_consistency(t, ev); if (EV_SEGV_RDTSC == ev.type()) { t->set_regs(trace_frame.regs()); } return COMPLETE; } /** * Run execution forwards for |t| until |t->trace.ticks| is reached, * and the $ip reaches the recorded $ip. After that, deliver |sig| if * nonzero. Return COMPLETE if successful or INCOMPLETE if an unhandled * interrupt occurred. */ Completion ReplaySession::emulate_async_signal( Task* t, const StepConstraints& constraints, Ticks ticks) { return advance_to(t, trace_frame.regs(), 0, constraints, ticks); } /** * Restore the recorded syscallbuf data to the tracee, preparing the * tracee for replaying the records. Return the number of record * bytes and a pointer to the first record through outparams. */ void ReplaySession::prepare_syscallbuf_records(Task* t) { // Read the recorded syscall buffer back into the buffer // region. auto buf = t->trace_reader().read_raw_data(); ASSERT(t, buf.data.size() >= sizeof(struct syscallbuf_hdr)); ASSERT(t, buf.data.size() <= SYSCALLBUF_BUFFER_SIZE); ASSERT(t, buf.addr == t->syscallbuf_child.cast()); struct syscallbuf_hdr recorded_hdr; memcpy(&recorded_hdr, buf.data.data(), sizeof(struct syscallbuf_hdr)); // Don't overwrite t->syscallbuf_hdr. That needs to keep tracking the current // syscallbuf state. memcpy(t->syscallbuf_hdr + 1, buf.data.data() + sizeof(struct syscallbuf_hdr), buf.data.size() - sizeof(struct syscallbuf_hdr)); ASSERT(t, recorded_hdr.num_rec_bytes + sizeof(struct syscallbuf_hdr) <= SYSCALLBUF_BUFFER_SIZE); current_step.flush.stop_breakpoint_addr = t->stopping_breakpoint_table.to_data_ptr().as_int() + (recorded_hdr.num_rec_bytes / 8) * t->stopping_breakpoint_table_entry_size; LOG(debug) << "Prepared " << (uint32_t)recorded_hdr.num_rec_bytes << " bytes of syscall records"; } /** * Replay all the syscalls recorded in the interval between |t|'s * current execution point and the next non-syscallbuf event (the one * that flushed the buffer). Return COMPLETE if successful or INCOMPLETE if an * unhandled interrupt occurred. */ Completion ReplaySession::flush_syscallbuf(Task* t, const StepConstraints& constraints) { struct syscallbuf_record* next_rec = next_record(t->syscallbuf_hdr); TicksRequest ticks_request; if (!compute_ticks_request(t, constraints, &ticks_request)) { return INCOMPLETE; } bool added = t->vm()->add_breakpoint(current_step.flush.stop_breakpoint_addr, TRAP_BKPT_INTERNAL); ASSERT(t, added); continue_or_step(t, constraints, ticks_request, RESUME_CONT); bool user_breakpoint_at_addr = t->vm()->get_breakpoint_type_at_addr( current_step.flush.stop_breakpoint_addr) != TRAP_BKPT_INTERNAL; t->vm()->remove_breakpoint(current_step.flush.stop_breakpoint_addr, TRAP_BKPT_INTERNAL); // Account for buffered syscalls just completed struct syscallbuf_record* end_rec = next_record(t->syscallbuf_hdr); while (next_rec != end_rec) { accumulate_syscall_performed(); maybe_gc_emufs(t->arch(), next_rec->syscallno); next_rec = (struct syscallbuf_record*)((uint8_t*)next_rec + stored_record_size(next_rec->size)); } if (t->pending_sig() == PerfCounters::TIME_SLICE_SIGNAL) { // This would normally be triggered by constraints.ticks_target but it's // also possible to get stray signals here. return INCOMPLETE; } if (is_ignored_signal(t->pending_sig())) { return flush_syscallbuf(t, constraints); } ASSERT(t, t->pending_sig() == SIGTRAP) << "Replay got unexpected signal (or none) " << t->pending_sig(); if (t->ip().decrement_by_bkpt_insn_length(t->arch()) == remote_code_ptr(current_step.flush.stop_breakpoint_addr) && !user_breakpoint_at_addr) { Registers r = t->regs(); r.set_ip(current_step.flush.stop_breakpoint_addr); t->set_regs(r); return COMPLETE; } return INCOMPLETE; } Completion ReplaySession::patch_next_syscall( Task* t, const StepConstraints& constraints) { if (cont_syscall_boundary(t, constraints) == INCOMPLETE) { return INCOMPLETE; } t->exit_syscall_and_prepare_restart(); // All patching effects have been recorded to the trace. // First, replay any memory mapping done by Monkeypatcher. There should be // at most one but we might as well be general. while (true) { TraceReader::MappedData data; bool found; KernelMapping km = t->trace_reader().read_mapped_region(&data, &found); if (!found) { break; } AutoRemoteSyscalls remote(t); ASSERT(t, km.flags() & MAP_ANONYMOUS); remote.infallible_mmap_syscall(km.start(), km.size(), km.prot(), km.flags() | MAP_FIXED, -1, 0); t->vm()->map(km.start(), km.size(), km.prot(), km.flags(), 0, string(), KernelMapping::NO_DEVICE, KernelMapping::NO_INODE, &km, TraceWriter::PATCH_MAPPING); } // Now replay all data records. t->apply_all_data_records_from_trace(); return COMPLETE; } /** * Return true if replaying |ev| by running |step| should result in * the target task having the same ticks value as it did during * recording. */ static bool has_deterministic_ticks(const Event& ev, const ReplayTraceStep& step) { if (ev.has_ticks_slop()) { return false; } // We won't necessarily reach the same ticks when replaying an // async signal, due to debugger interrupts and other // implementation details. This is checked in |advance_to()| // anyway. return TSTEP_PROGRAM_ASYNC_SIGNAL_INTERRUPT != step.action; } void ReplaySession::check_approaching_ticks_target( Task* t, const StepConstraints& constraints, BreakStatus& break_status) { if (constraints.ticks_target > 0) { Ticks ticks_left = constraints.ticks_target - t->tick_count(); if (ticks_left <= SKID_SIZE) { break_status.approaching_ticks_target = true; } } } Completion ReplaySession::advance_to_ticks_target( Task* t, const StepConstraints& constraints) { while (true) { TicksRequest ticks_request; if (!compute_ticks_request(t, constraints, &ticks_request)) { return INCOMPLETE; } continue_or_step(t, constraints, ticks_request); if (SIGTRAP == t->pending_sig()) { return INCOMPLETE; } } } /** * Try to execute |step|, adjusting for |req| if needed. Return COMPLETE if * |step| was made, or INCOMPLETE if there was a trap or |step| needs * more work. */ Completion ReplaySession::try_one_trace_step( Task* t, const StepConstraints& constraints) { if (constraints.ticks_target > 0 && !trace_frame.event().has_ticks_slop() && t->current_trace_frame().ticks() > constraints.ticks_target) { // Instead of doing this step, just advance to the ticks_target, since // that happens before this event completes. // Unfortunately we can't do this for TSTEP_FLUSH_SYSCALLBUF // because its tick count can't be trusted. // cont_syscall_boundary handles the ticks constraint for those cases. return advance_to_ticks_target(t, constraints); } switch (current_step.action) { case TSTEP_RETIRE: return COMPLETE; case TSTEP_ENTER_SYSCALL: return enter_syscall(t, constraints); case TSTEP_EXIT_SYSCALL: return exit_syscall(t, constraints); case TSTEP_DETERMINISTIC_SIGNAL: return emulate_deterministic_signal(t, current_step.target.signo, constraints); case TSTEP_PROGRAM_ASYNC_SIGNAL_INTERRUPT: return emulate_async_signal(t, constraints, current_step.target.ticks); case TSTEP_DELIVER_SIGNAL: return emulate_signal_delivery(t, current_step.target.signo, constraints); case TSTEP_FLUSH_SYSCALLBUF: return flush_syscallbuf(t, constraints); case TSTEP_PATCH_SYSCALL: return patch_next_syscall(t, constraints); case TSTEP_EXIT_TASK: return exit_task(t, constraints); default: FATAL() << "Unhandled step type " << current_step.action; return COMPLETE; } } /** * Task death during replay always goes through here (except for * Session::kill_all_tasks when we forcibly kill all tasks in the session at * once). |exit| and |exit_group| syscalls are both emulated so the real * task doesn't die until we reach the EXIT/UNSTABLE_EXIT events in the trace. * This ensures the real tasks are alive and available as long as our Task * object exists, which simplifies code like Session cloning. * * Killing tasks with fatal signals doesn't work because a fatal signal will * try to kill all the tasks in the task group. Instead we inject an |exit| * syscall, which is apparently the only way to kill one specific thread. */ static void end_task(Task* t) { ASSERT(t, t->ptrace_event() != PTRACE_EVENT_EXIT); // Emulate what the kernel would do during a task exit. We don't let the // kernel do these during replay. The kernel would also do a FUTEX_WAKE on // this address, but we don't need to do that. if (!t->tid_addr().is_null()) { bool ok = true; // Ignore writes to invalid locations; the kernel does t->write_mem(t->tid_addr(), 0, &ok); } Registers r = t->regs(); r.set_ip(t->vm()->privileged_traced_syscall_ip()); r.set_syscallno(syscall_number_for_exit(t->arch())); t->set_regs(r); // Enter the syscall. t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS); ASSERT(t, t->pending_sig() == 0); do { // Singlestep to collect the PTRACE_EVENT_EXIT event. t->resume_execution(RESUME_SINGLESTEP, RESUME_WAIT, RESUME_NO_TICKS); } while (t->is_ptrace_seccomp_event() || ReplaySession::is_ignored_signal(t->pending_sig())); ASSERT(t, t->ptrace_event() == PTRACE_EVENT_EXIT); delete t; } Completion ReplaySession::exit_task(Task* t, const StepConstraints& constraints) { ASSERT(t, !t->seen_ptrace_exit_event); // Apply robust-futex updates captured during recording. t->apply_all_data_records_from_trace(); end_task(t); /* |t| is dead now. */ gc_emufs(); return COMPLETE; } /** * Set up rep_trace_step state in t's Session to start replaying towards * the event given by the session's current_trace_frame --- but only if * it's not already set up. * Return true if we should continue replaying, false if the debugger * requested a restart. If this returns false, t's Session state was not * modified. */ void ReplaySession::setup_replay_one_trace_frame(Task* t) { const Event& ev = trace_frame.event(); LOG(debug) << "[line " << trace_frame.time() << "] " << t->rec_tid << ": replaying " << Event(ev) << "; state " << (ev.is_syscall_event() ? state_name(ev.Syscall().state) : " (none)"); if (t->syscallbuf_hdr) { LOG(debug) << " (syscllbufsz:" << (uint32_t)t->syscallbuf_hdr->num_rec_bytes << ", abrtcmt:" << bool(t->syscallbuf_hdr->abort_commit) << ", locked:" << bool(t->syscallbuf_hdr->locked) << ")"; } /* Ask the trace-interpretation code what to do next in order * to retire the current frame. */ memset(¤t_step, 0, sizeof(current_step)); switch (ev.type()) { case EV_UNSTABLE_EXIT: case EV_EXIT: current_step.action = TSTEP_EXIT_TASK; break; case EV_SYSCALLBUF_ABORT_COMMIT: t->syscallbuf_hdr->abort_commit = 1; current_step.action = TSTEP_RETIRE; break; case EV_SYSCALLBUF_FLUSH: current_step.action = TSTEP_FLUSH_SYSCALLBUF; prepare_syscallbuf_records(t); break; case EV_SYSCALLBUF_RESET: // Reset syscallbuf_hdr->num_rec_bytes and zero out the recorded data. // Zeroing out the data is important because we only save and restore // the recorded data area when making checkpoints. We want the checkpoint // to have the same syscallbuf contents as its original, i.e. zero outside // the recorded data area. This is important because stray reads such // as those performed by return_addresses should be consistent. t->reset_syscallbuf(); current_step.action = TSTEP_RETIRE; break; case EV_PATCH_SYSCALL: current_step.action = TSTEP_PATCH_SYSCALL; break; case EV_SCHED: current_step.action = TSTEP_PROGRAM_ASYNC_SIGNAL_INTERRUPT; current_step.target.ticks = trace_frame.ticks(); current_step.target.signo = 0; break; case EV_SEGV_RDTSC: current_step.action = TSTEP_DETERMINISTIC_SIGNAL; current_step.target.ticks = -1; current_step.target.signo = SIGSEGV; break; case EV_GROW_MAP: process_grow_map(t); current_step.action = TSTEP_RETIRE; break; case EV_INTERRUPTED_SYSCALL_NOT_RESTARTED: LOG(debug) << " popping interrupted but not restarted " << t->ev(); t->pop_syscall_interruption(); current_step.action = TSTEP_RETIRE; break; case EV_EXIT_SIGHANDLER: LOG(debug) << "<-- sigreturn from " << t->ev(); t->pop_signal_handler(); current_step.action = TSTEP_RETIRE; break; case EV_SIGNAL: if (treat_signal_event_as_deterministic(ev.Signal())) { current_step.action = TSTEP_DETERMINISTIC_SIGNAL; current_step.target.signo = ev.Signal().siginfo.si_signo; current_step.target.ticks = -1; } else { current_step.action = TSTEP_PROGRAM_ASYNC_SIGNAL_INTERRUPT; current_step.target.signo = ev.Signal().siginfo.si_signo; current_step.target.ticks = trace_frame.ticks(); } break; case EV_SIGNAL_DELIVERY: case EV_SIGNAL_HANDLER: current_step.action = TSTEP_DELIVER_SIGNAL; current_step.target.signo = ev.Signal().siginfo.si_signo; break; case EV_SYSCALL: rep_process_syscall(t, ¤t_step); if (trace_frame.event().Syscall().state == EXITING_SYSCALL && current_step.action == TSTEP_RETIRE) { t->on_syscall_exit(current_step.syscall.number, trace_frame.regs()); maybe_gc_emufs(t->arch(), trace_frame.regs().syscallno()); } break; default: FATAL() << "Unexpected event " << ev; } } bool ReplaySession::next_step_is_syscall_exit(int syscallno) { return current_step.action == TSTEP_NONE && trace_frame.event().is_syscall_event() && trace_frame.event().Syscall().number == syscallno && trace_frame.event().Syscall().state == EXITING_SYSCALL; } ReplayResult ReplaySession::replay_step(const StepConstraints& constraints) { finish_initializing(); ReplayResult result(REPLAY_CONTINUE); Task* t = current_task(); if (EV_TRACE_TERMINATION == trace_frame.event().type()) { result.status = REPLAY_EXITED; return result; } /* If we restored from a checkpoint, the steps might have been * computed already in which case step.action will not be TSTEP_NONE. */ if (current_step.action == TSTEP_NONE) { setup_replay_one_trace_frame(t); if (current_step.action == TSTEP_NONE) { // Already at the destination event. advance_to_next_trace_frame(constraints.stop_at_time); } if (current_step.action == TSTEP_EXIT_TASK) { result.break_status.task = t; result.break_status.task_exit = true; } return result; } did_fast_forward = false; // Now we know |t| hasn't died, so save it in break_status. result.break_status.task = t; /* Advance towards fulfilling |current_step|. */ if (try_one_trace_step(t, constraints) == INCOMPLETE) { if (EV_TRACE_TERMINATION == trace_frame.event().type()) { // An irregular trace step had to read the // next trace frame, and that frame was an // early-termination marker. Otherwise we // would have seen the marker above. result.status = REPLAY_EXITED; return result; } // We got INCOMPLETE because there was some kind of debugger trap or // we got close to ticks_target. result.break_status = diagnose_debugger_trap(t); ASSERT(t, !result.break_status.signal) << "Expected either SIGTRAP at $ip " << t->ip() << " or USER breakpoint just after it"; ASSERT(t, !result.break_status.singlestep_complete || constraints.is_singlestep()); check_approaching_ticks_target(t, constraints, result.break_status); result.did_fast_forward = did_fast_forward; return result; } result.did_fast_forward = did_fast_forward; switch (current_step.action) { case TSTEP_DETERMINISTIC_SIGNAL: case TSTEP_PROGRAM_ASYNC_SIGNAL_INTERRUPT: if (trace_frame.event().type() != EV_SEGV_RDTSC) { result.break_status.signal = current_step.target.signo; } if (constraints.is_singlestep()) { result.break_status.singlestep_complete = true; } break; case TSTEP_DELIVER_SIGNAL: // When we deliver a terminating signal, do not let the singlestep // complete; proceed on to report our synthetic SIGKILL or task death. if (constraints.is_singlestep() && !(trace_frame.event().type() == EV_SIGNAL_DELIVERY && is_fatal_default_action(current_step.target.signo))) { result.break_status.singlestep_complete = true; } break; case TSTEP_EXIT_TASK: t = result.break_status.task = nullptr; assert(!result.break_status.any_break()); break; case TSTEP_ENTER_SYSCALL: cpuid_bug_detector.notify_reached_syscall_during_replay(t); break; default: break; } if (t) { const Event& ev = trace_frame.event(); if (can_validate() && ev.is_syscall_event() && ::Flags::get().check_cached_mmaps) { t->vm()->verify(t); } if (has_deterministic_ticks(ev, current_step)) { check_ticks_consistency(t, ev); } debug_memory(t); check_for_watchpoint_changes(t, result.break_status); check_approaching_ticks_target(t, constraints, result.break_status); } // Advance to next trace frame before doing rep_after_enter_syscall, // so that FdTable notifications run with the same trace timestamp during // replay as during recording advance_to_next_trace_frame(constraints.stop_at_time); if (TSTEP_ENTER_SYSCALL == current_step.action) { // Advance to next trace frame before we call rep_after_enter_syscall, // since that matches what we do during recording and it matters for // reporting event numbers on stdio. rep_after_enter_syscall(t, current_step.syscall.number); } // Record that this step completed successfully. current_step.action = TSTEP_NONE; Task* next_task = current_task(); if (next_task && !next_task->vm()->first_run_event() && can_validate()) { next_task->vm()->set_first_run_event(trace_frame.time()); } if (next_task) { ticks_at_start_of_event = next_task->tick_count(); } return result; } rr-4.1.0/src/ReplaySession.h000066400000000000000000000256531265436462100157230ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_REPLAY_SESSION_H_ #define RR_REPLAY_SESSION_H_ #include #include "CPUIDBugDetector.h" #include "DiversionSession.h" #include "EmuFs.h" #include "Session.h" struct syscallbuf_hdr; /** * ReplayFlushBufferedSyscallState is saved in Session and cloned with its * Session, so it needs to be simple data, i.e. not holding pointers to * per-Session data. */ struct ReplayFlushBufferedSyscallState { /* An internal breakpoint is set at this address */ uintptr_t stop_breakpoint_addr; }; /** * Describes the next step to be taken in order to replay a trace * frame. */ enum ReplayTraceStepType { TSTEP_NONE, /* Enter/exit a syscall. |syscall| describe what should be * done at entry/exit. */ TSTEP_ENTER_SYSCALL, TSTEP_EXIT_SYSCALL, /* Advance to the deterministic signal |signo|. */ TSTEP_DETERMINISTIC_SIGNAL, /* Advance until |target.ticks| have been retired and then * |target.ip| is reached. */ TSTEP_PROGRAM_ASYNC_SIGNAL_INTERRUPT, /* Deliver signal |signo|. */ TSTEP_DELIVER_SIGNAL, /* Replay the upcoming buffered syscalls. |flush| tracks the * replay state.*/ TSTEP_FLUSH_SYSCALLBUF, /* Replay until we enter the next syscall, then patch it. */ TSTEP_PATCH_SYSCALL, /* Exit the task */ TSTEP_EXIT_TASK, /* Frame has been replayed, done. */ TSTEP_RETIRE, }; /** * rep_trace_step is saved in Session and cloned with its Session, so it needs * to be simple data, i.e. not holding pointers to per-Session data. */ struct ReplayTraceStep { ReplayTraceStepType action; union { struct { /* The syscall number we expect to * enter/exit. */ int number; } syscall; struct { Ticks ticks; int signo; } target; ReplayFlushBufferedSyscallState flush; }; }; enum ReplayStatus { // Some execution was replayed. replay_step() can be called again. REPLAY_CONTINUE, // All tracees are dead. replay_step() should not be called again. REPLAY_EXITED }; struct ReplayResult { ReplayResult(ReplayStatus status = REPLAY_CONTINUE) : status(status), did_fast_forward(false) {} ReplayStatus status; BreakStatus break_status; // True if we did a fast-forward operation, in which case // break_status.singlestep_complete might indicate the completion of more // than one instruction. bool did_fast_forward; }; /** * An indicator of how much progress the ReplaySession has made within a given * (TraceFrame::Time, Ticks) pair. These can only be used for comparisons, to * check whether two ReplaySessions are in the same state and to help * order their states temporally. */ class ReplayStepKey { public: /** * Construct the "none" key; this value is before or equal to every other * key value. */ ReplayStepKey() : action(TSTEP_NONE) {} explicit ReplayStepKey(ReplayTraceStepType action) : action(action) {} bool operator==(const ReplayStepKey& other) const { return action == other.action; } bool operator<(const ReplayStepKey& other) const { return action < other.action; } bool in_execution() const { return action != TSTEP_NONE; } int as_int() const { return (int)action; } private: ReplayTraceStepType action; }; /** Encapsulates additional session state related to replay. */ class ReplaySession : public Session { public: typedef std::shared_ptr shr_ptr; ~ReplaySession(); /** * Return a semantic copy of all the state managed by this, * that is the entire tracee tree and the state it depends on. * Any mutations of the returned Session can't affect the * state of this, and vice versa. * * This operation is also called "checkpointing" the replay * session. * * The returned clone is only partially initialized. This uses less * system resources than a fully-initialized session, so if you're going * to keep a session around inactive, keep the clone and not the original * session. Partially initialized sessions automatically finish * initializing when necessary. */ shr_ptr clone(); /** * Return true if we're in a state where it's OK to clone. For example, * we can't clone in some syscalls. */ bool can_clone(); /** * Like |clone()|, but return a session in "diversion" mode, * which allows free execution. */ DiversionSession::shr_ptr clone_diversion(); EmuFs& emufs() const { return *emu_fs; } /** Collect garbage files from this session's emufs. */ void gc_emufs(); /** Run emufs gc if this syscall may release a file */ void maybe_gc_emufs(SupportedArch arch, int syscallno); TraceReader& trace_reader() { return trace_in; } const TraceReader& trace_reader() const { return trace_in; } /** * The trace record that we are working on --- the next event * for replay to reach. */ const TraceFrame& current_trace_frame() const { return trace_frame; } /** * The Task for the current trace record. */ Task* current_task() { finish_initializing(); return find_task(trace_frame.tid()); } /** * Returns true if the next step for this session is to exit a syscall with * the given number. */ bool next_step_is_syscall_exit(int syscallno); /** * The current ReplayStepKey. */ ReplayStepKey current_step_key() const { return ReplayStepKey(current_step.action); } Ticks ticks_at_start_of_current_event() const { return ticks_at_start_of_event; } /** * Create a replay session that will use the trace directory specified * by 'dir', or the latest trace if 'dir' is not supplied. */ static shr_ptr create(const std::string& dir); struct StepConstraints { explicit StepConstraints(RunCommand command) : command(command), stop_at_time(0), ticks_target(0) {} RunCommand command; TraceFrame::Time stop_at_time; Ticks ticks_target; // When the RunCommand is RUN_SINGLESTEP_FAST_FORWARD, stop if the next // singlestep would enter one of the register states in this list. // RUN_SINGLESTEP_FAST_FORWARD will always singlestep at least once // regardless. std::vector stop_before_states; bool is_singlestep() const { return command == RUN_SINGLESTEP || command == RUN_SINGLESTEP_FAST_FORWARD; } }; /** * Take a single replay step. * Ensure we stop at event stop_at_time. If this is not specified, * optimizations may cause a replay_step to pass straight through * stop_at_time. * Outside of replay_step, no internal breakpoints will be set for any * task in this session. * Stop when the current event reaches stop_at_time (i.e. this event has * is the next event to be replayed). * If ticks_target is nonzero, stop before the current task's ticks * reaches ticks_target (but not too far before, unless we hit a breakpoint * or stop_at_time). Only useful for RUN_CONTINUE. * Always stops on a switch to a new task. */ ReplayResult replay_step(const StepConstraints& constraints); ReplayResult replay_step(RunCommand command) { return replay_step(StepConstraints(command)); } virtual ReplaySession* as_replay() { return this; } /** * Return true if |sig| is a signal that may be generated during * replay but should be ignored. For example, SIGCHLD can be * delivered at almost point during replay when tasks exit, but it's * not part of the recording and shouldn't be delivered. * * TODO: can we do some clever sigprocmask'ing to avoid pending * signals altogether? */ static bool is_ignored_signal(int sig); struct Flags { Flags() : redirect_stdio(false) {} Flags(const Flags& other) = default; bool redirect_stdio; }; bool redirect_stdio() { return flags.redirect_stdio; } void set_flags(const Flags& flags) { this->flags = flags; } private: ReplaySession(const std::string& dir) : emu_fs(EmuFs::create()), trace_in(dir), trace_frame(), current_step(), ticks_at_start_of_event(0) { advance_to_next_trace_frame(0); } ReplaySession(const ReplaySession& other) : Session(other), emu_fs(other.emu_fs->clone()), trace_in(other.trace_in), trace_frame(other.trace_frame), current_step(other.current_step), ticks_at_start_of_event(other.ticks_at_start_of_event), cpuid_bug_detector(other.cpuid_bug_detector), flags(other.flags) {} void setup_replay_one_trace_frame(Task* t); void advance_to_next_trace_frame(TraceFrame::Time stop_at_time); Completion emulate_signal_delivery(Task* oldtask, int sig, const StepConstraints& constraints); Completion try_one_trace_step(Task* t, const StepConstraints& step_constraints); Completion cont_syscall_boundary(Task* t, const StepConstraints& constraints); Completion enter_syscall(Task* t, const StepConstraints& constraints); Completion exit_syscall(Task* t, const StepConstraints& constraints); Completion exit_task(Task* t, const StepConstraints& constraints); void check_ticks_consistency(Task* t, const Event& ev); void check_pending_sig(Task* t); void continue_or_step(Task* t, const StepConstraints& constraints, TicksRequest tick_request, ResumeRequest resume_how = RESUME_SYSCALL); enum ExecStateType { UNKNOWN, NOT_AT_TARGET, AT_TARGET }; TrapType compute_trap_type(Task* t, int target_sig, SignalDeterministic deterministic, ExecStateType exec_state, const StepConstraints& constraints); bool is_debugger_trap(Task* t, int target_sig, SignalDeterministic deterministic, ExecStateType exec_state, const StepConstraints& constraints); Completion advance_to(Task* t, const Registers& regs, int sig, const StepConstraints& constraints, Ticks ticks); Completion advance_to_ticks_target(Task* t, const StepConstraints& constraints); Completion emulate_deterministic_signal(Task* t, int sig, const StepConstraints& constraints); Completion emulate_async_signal(Task* t, const StepConstraints& constraints, Ticks ticks); void prepare_syscallbuf_records(Task* t); Completion flush_syscallbuf(Task* t, const StepConstraints& constraints); Completion patch_next_syscall(Task* t, const StepConstraints& constraints); void check_approaching_ticks_target(Task* t, const StepConstraints& constraints, BreakStatus& break_status); std::shared_ptr emu_fs; Task* last_debugged_task; TraceReader trace_in; TraceFrame trace_frame; ReplayTraceStep current_step; Ticks ticks_at_start_of_event; CPUIDBugDetector cpuid_bug_detector; Flags flags; bool did_fast_forward; }; #endif // RR_REPLAY_SESSION_H_ rr-4.1.0/src/ReplayTimeline.cc000066400000000000000000001550061265436462100162000ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "ReplayTimeline" #include "ReplayTimeline.h" #include #include "fast_forward.h" #include "log.h" using namespace rr; using namespace std; ReplayTimeline::InternalMark::~InternalMark() { if (owner && checkpoint) { owner->remove_mark_with_checkpoint(key); } } ostream& operator<<(ostream& s, const ReplayTimeline::MarkKey& o) { return s << "time:" << o.trace_time << ",ticks:" << o.ticks << ",st:" << o.step_key.as_int(); } ostream& operator<<(ostream& s, const ReplayTimeline::InternalMark& o) { return s << "{" << o.key << ",regs_ip:" << o.regs.ip() << "}"; } ostream& operator<<(ostream& s, const ReplayTimeline::Mark& o) { if (!o.ptr) { return s << "{null}"; } return s << *o.ptr.get(); } ostream& operator<<(ostream& s, const ReplayTimeline::ProtoMark& o) { return s << "{" << o.key << ",regs_ip:" << o.regs.ip() << "}"; } bool ReplayTimeline::less_than(const Mark& m1, const Mark& m2) { assert(m1.ptr->owner == m2.ptr->owner); if (m1.ptr->key < m2.ptr->key) { return true; } if (m2.ptr->key < m1.ptr->key) { return false; } if (!m1.ptr->owner) { return false; } for (shared_ptr& m : m1.ptr->owner->marks[m1.ptr->key]) { if (m == m2.ptr) { return false; } if (m == m1.ptr) { return true; } } assert(0 && "Marks missing from vector, invariants broken!"); return false; } ReplayTimeline::ReplayTimeline(std::shared_ptr session, const ReplaySession::Flags& session_flags) : session_flags(session_flags), current(std::move(session)), breakpoints_applied(false), reverse_execution_barrier_event(0) { current->set_visible_execution(false); current->set_flags(session_flags); } ReplayTimeline::~ReplayTimeline() { for (auto it : marks) { for (shared_ptr& itv : it.second) { itv->owner = nullptr; itv->checkpoint = nullptr; } } } static bool equal_regs(const Registers& r1, const Registers& r2) { // Compare ip()s first since they will usually fail to match, especially // when we're comparing InternalMarks with the same MarkKey return r1.ip() == r2.ip() && r1.matches(r2); } bool ReplayTimeline::InternalMark::equal_states(ReplaySession& session) const { if (session_mark_key(session) != key) { return false; } Task* t = session.current_task(); return equal_regs(regs, t->regs()) && return_addresses == t->return_addresses(); } bool ReplayTimeline::ProtoMark::equal_states(ReplaySession& session) const { if (session_mark_key(session) != key) { return false; } Task* t = session.current_task(); return equal_regs(regs, t->regs()) && return_addresses == t->return_addresses(); } ReplayTimeline::ProtoMark ReplayTimeline::proto_mark() const { return ProtoMark(current_mark_key(), current->current_task()); } shared_ptr ReplayTimeline::current_mark() { auto it = marks.find(current_mark_key()); // Avoid creating an entry in 'marks' if it doesn't already exist if (it != marks.end()) { for (shared_ptr& m : it->second) { if (m->equal_states(*current)) { return m; } } } return shared_ptr(); } ReplayTimeline::Mark ReplayTimeline::mark() { Mark result; auto cm = current_mark(); if (cm) { swap(cm, result.ptr); return result; } MarkKey key = current_mark_key(); shared_ptr m = make_shared(this, *current, key); auto& mark_vector = marks[key]; if (mark_vector.empty()) { mark_vector.push_back(m); } else if (mark_vector[mark_vector.size() - 1] == current_at_or_after_mark) { mark_vector.push_back(m); } else { // Now the hard part: figuring out where to put it in the list of existing // marks. unapply_breakpoints_and_watchpoints(); ReplaySession::shr_ptr tmp_session = current->clone(); vector >::iterator mark_index = mark_vector.end(); // We could set breakpoints at the marks and then continue with an // interrupt set to fire when our tick-count increases. But that requires // new replay functionality (probably a new RunCommand), so for now, do the // simplest thing and just single-step until we find where to put the new // mark(s). vector > new_marks; new_marks.push_back(m); LOG(debug) << "mark() replaying to find mark location"; // Allow coalescing of multiple repetitions of a single x86 string // instruction (as long as we don't reach one of our mark_vector states). ReplaySession::StepConstraints constraints(RUN_SINGLESTEP_FAST_FORWARD); for (auto& mv : mark_vector) { constraints.stop_before_states.push_back(&mv->regs); } while (true) { auto result = tmp_session->replay_step(constraints); if (session_mark_key(*tmp_session) != key || result.status != REPLAY_CONTINUE) { break; } if (!result.break_status.singlestep_complete) { continue; } for (auto it = mark_vector.begin(); it != mark_vector.end(); ++it) { shared_ptr& existing_mark = *it; if (existing_mark->equal_states(*tmp_session)) { if (!result.did_fast_forward && !result.break_status.signal) { new_marks.back()->singlestep_to_next_mark_no_signal = true; } mark_index = it; break; } } if (mark_index != mark_vector.end()) { break; } // Some callers singlestep through N instructions, all with the same // MarkKey, requesting a Mark after each step. If there's a Mark at the // end of the N instructions, this could mean N(N+1)/2 singlestep // operations total. To avoid that, add all the intermediate states to // the mark map now, so the first mark() call will perform N singlesteps // and the rest will perform none. if (!result.did_fast_forward && !result.break_status.signal) { new_marks.back()->singlestep_to_next_mark_no_signal = true; } new_marks.push_back(make_shared(this, *tmp_session, key)); } LOG(debug) << "Mark location found"; // mark_index is the current index of the next mark after 'current'. So // insert our new marks at mark_index. mark_vector.insert(mark_index, new_marks.begin(), new_marks.end()); } swap(m, result.ptr); current_at_or_after_mark = result.ptr; return result; } void ReplayTimeline::mark_after_singlestep(const Mark& from, const ReplayResult& result) { Mark m = mark(); if (!result.did_fast_forward && m.ptr->key == from.ptr->key && !result.break_status.signal) { auto& mark_vector = marks[m.ptr->key]; for (size_t i = 0; i < mark_vector.size(); ++i) { if (mark_vector[i] == from.ptr) { assert(i + 1 < mark_vector.size() && mark_vector[i + 1] == m.ptr); break; } } from.ptr->singlestep_to_next_mark_no_signal = true; } } ReplayTimeline::Mark ReplayTimeline::find_singlestep_before(const Mark& mark) { auto& mark_vector = marks[mark.ptr->key]; ssize_t i; for (i = mark_vector.size() - 1; i >= 0; --i) { if (mark_vector[i] == mark.ptr) { break; } } assert(i >= 0 && "Mark not in vector???"); Mark m; if (i == 0) { return m; } if (!mark_vector[i - 1]->singlestep_to_next_mark_no_signal) { return m; } m.ptr = mark_vector[i - 1]; return m; } ReplayTimeline::Mark ReplayTimeline::lazy_reverse_singlestep(const Mark& from, Task* t) { if (!no_watchpoints_hit_interval_start || !no_watchpoints_hit_interval_end) { return Mark(); } Mark m = find_singlestep_before(from); if (m && m >= no_watchpoints_hit_interval_start && m < no_watchpoints_hit_interval_end && !has_breakpoint_at_address(t, from.ptr->regs.ip())) { return m; } return Mark(); } ReplayTimeline::Mark ReplayTimeline::add_explicit_checkpoint() { assert(current->can_clone()); Mark m = mark(); if (!m.ptr->checkpoint) { unapply_breakpoints_and_watchpoints(); m.ptr->checkpoint = current->clone(); auto key = m.ptr->key; if (marks_with_checkpoints.find(key) == marks_with_checkpoints.end()) { marks_with_checkpoints[key] = 1; } else { marks_with_checkpoints[key]++; } } ++m.ptr->checkpoint_refcount; return m; } void ReplayTimeline::remove_mark_with_checkpoint(const MarkKey& key) { assert(marks_with_checkpoints[key] > 0); if (--marks_with_checkpoints[key] == 0) { marks_with_checkpoints.erase(key); } } void ReplayTimeline::remove_explicit_checkpoint(const Mark& mark) { assert(mark.ptr->checkpoint_refcount > 0); if (--mark.ptr->checkpoint_refcount == 0) { mark.ptr->checkpoint = nullptr; remove_mark_with_checkpoint(mark.ptr->key); } } void ReplayTimeline::seek_to_before_key(const MarkKey& key) { auto it = marks_with_checkpoints.lower_bound(key); // 'it' points to the first value equivalent to or greater than 'key'. auto current_key = current_mark_key(); if (it == marks_with_checkpoints.begin()) { if (current_key < key) { // We can use the current session, so do nothing. } else { // nowhere earlier to go, so restart from beginning. current = ReplaySession::create(current->trace_reader().dir()); breakpoints_applied = false; current_at_or_after_mark = nullptr; current->set_flags(session_flags); } } else { --it; // 'it' is now at the last checkpoint before 'key' if (it->first < current_key && current_key < key) { // Current state is closer to the destination than any checkpoint we // have, so do nothing. } else { // Return one of the checkpoints at *it. current = nullptr; for (auto mark_it : marks[it->first]) { shared_ptr m(mark_it); if (m->checkpoint) { current = m->checkpoint->clone(); // At this point, m->checkpoint is fully initialized but current // is not. Swap them so that m->checkpoint is not fully // initialized, to reduce resource usage. swap(current, m->checkpoint); break; } } assert(current); breakpoints_applied = false; current_at_or_after_mark = nullptr; } } } void ReplayTimeline::seek_up_to_mark(const Mark& mark) { if (current_mark_key() == mark.ptr->key) { Mark cm = this->mark(); if (cm <= mark) { // close enough, stay where we are return; } } // Check if any of the marks with the same key as 'mark', but not after // 'mark', are usable. auto& mark_vector = marks[mark.ptr->key]; bool at_or_before_mark = false; for (ssize_t i = mark_vector.size() - 1; i >= 0; --i) { auto& m = mark_vector[i]; if (m == mark.ptr) { at_or_before_mark = true; } if (at_or_before_mark && m->checkpoint) { current = m->checkpoint->clone(); // At this point, m->checkpoint is fully initialized but current // is not. Swap them so that m->checkpoint is not fully // initialized, to reduce resource usage. swap(current, m->checkpoint); breakpoints_applied = false; current_at_or_after_mark = m; return; } } return seek_to_before_key(mark.ptr->key); } ReplaySession::StepConstraints ReplayTimeline::ReplayStepToMarkStrategy::setup_step_constraints() { ReplaySession::StepConstraints constraints(RUN_CONTINUE); if (singlesteps_to_perform > 0) { constraints.command = RUN_SINGLESTEP_FAST_FORWARD; --singlesteps_to_perform; } return constraints; } void ReplayTimeline::update_strategy_and_fix_watchpoint_quirk( ReplayStepToMarkStrategy& strategy, const ReplaySession::StepConstraints& constraints, ReplayResult& result, const ProtoMark& before) { if (constraints.command == RUN_CONTINUE && fix_watchpoint_coalescing_quirk(result, before)) { // It's quite common for x86 string instructions to trigger the same // watchpoint several times in consecutive instructions, e.g. if we're // doing a "rep movsb" over an 8-byte watchpoint. 8 invocations of // fix_watchpoint_coalescing_quirk could require 8 replays from some // previous checkpoint. To avoid that, after // fix_watchpoint_coalescing_quirk has fired once, singlestep the // next 7 times. strategy.singlesteps_to_perform = 7; } } ReplayResult ReplayTimeline::replay_step_to_mark( const Mark& mark, ReplayStepToMarkStrategy& strategy) { ProtoMark before = proto_mark(); ReplayResult result; if (current->trace_reader().time() < mark.ptr->key.trace_time) { // Easy case: each RUN_CONTINUE can only advance by at most one // trace event, so do one. But do a singlestep if our strategy suggests // we should. ReplaySession::StepConstraints constraints = strategy.setup_step_constraints(); constraints.stop_at_time = mark.ptr->key.trace_time; result = current->replay_step(constraints); update_strategy_and_fix_watchpoint_quirk(strategy, constraints, result, before); return result; } Task* t = current->current_task(); ASSERT(t, current->trace_reader().time() == mark.ptr->key.trace_time); // t must remain valid through here since t can only die when we complete // an event, and we're not going to complete another event before // reaching the mark ... apart from where we call // fix_watchpoint_coalescing_quirk. if (t->tick_count() < mark.ptr->key.ticks) { // Try to make progress by just continuing with a ticks constraint // set to stop us before the mark. This is efficient in the worst case, // when we must execute lots of instructions to reach the mark. ReplaySession::StepConstraints constraints = strategy.setup_step_constraints(); constraints.ticks_target = mark.ptr->key.ticks - 1; result = current->replay_step(constraints); bool approaching_ticks_target = result.break_status.approaching_ticks_target; result.break_status.approaching_ticks_target = false; // We can't be at the mark yet. ASSERT(t, t->tick_count() < mark.ptr->key.ticks); // If there's a break indicated, we should return that to the // caller without doing any more work if (!approaching_ticks_target || result.break_status.any_break()) { update_strategy_and_fix_watchpoint_quirk(strategy, constraints, result, before); return result; } // We may not have made any progress so we'll need to try another strategy } remote_code_ptr mark_addr = mark.ptr->regs.ip(); // Try adding a breakpoint at the required IP and running to it. // We can't do this if we're currently at the IP, since we'd make no progress. // Setting the breakpoint may fail; the mark address may be in invalid // memory, e.g. because it's at the delivery of a SIGSEGV for a bad IP. if (t->regs().ip() != mark_addr && t->vm()->add_breakpoint(mark_addr, TRAP_BKPT_USER)) { ReplaySession::StepConstraints constraints = strategy.setup_step_constraints(); result = current->replay_step(constraints); t->vm()->remove_breakpoint(mark_addr, TRAP_BKPT_USER); // If we hit our breakpoint and there is no client breakpoint there, // pretend we didn't hit it. if (result.break_status.breakpoint_hit && !has_breakpoint_at_address(result.break_status.task, result.break_status.task->ip())) { result.break_status.breakpoint_hit = false; } update_strategy_and_fix_watchpoint_quirk(strategy, constraints, result, before); return result; } // At required IP, but not in the correct state. Singlestep over this IP. // We need the FAST_FORWARD option in case the mark state occurs after // many iterations of a string instruction at this address. ReplaySession::StepConstraints constraints(RUN_SINGLESTEP_FAST_FORWARD); // We don't want to fast-forward past the mark state, so give the mark // state as a state we should stop before. FAST_FORWARD always does at // least one singlestep so one call to replay_step_to_mark will fast-forward // to the state before the mark and return, then the next call to // replay_step_to_mark will singlestep into the mark state. constraints.stop_before_states.push_back(&mark.ptr->regs); result = current->replay_step(constraints); // Hide internal singlestep but preserve other break statuses result.break_status.singlestep_complete = false; return result; } void ReplayTimeline::seek_to_proto_mark(const ProtoMark& pmark) { seek_to_before_key(pmark.key); unapply_breakpoints_and_watchpoints(); while (!pmark.equal_states(*current)) { if (current->trace_reader().time() < pmark.key.trace_time) { ReplaySession::StepConstraints constraints(RUN_CONTINUE); constraints.stop_at_time = pmark.key.trace_time; current->replay_step(constraints); } else { Task* t = current->current_task(); remote_code_ptr mark_addr = pmark.regs.ip(); if (t->regs().ip() == mark_addr && current->current_step_key().in_execution()) { // At required IP, but not in the correct state. Singlestep over // this IP. ReplaySession::StepConstraints constraints(RUN_SINGLESTEP_FAST_FORWARD); constraints.stop_before_states.push_back(&pmark.regs); current->replay_step(constraints); } else { // Get a shared reference to t->vm() in case t dies during replay_step shared_ptr vm = t->vm(); vm->add_breakpoint(mark_addr, TRAP_BKPT_USER); current->replay_step(RUN_CONTINUE); vm->remove_breakpoint(mark_addr, TRAP_BKPT_USER); } } } } void ReplayTimeline::seek_to_mark(const Mark& mark) { seek_up_to_mark(mark); while (current_mark() != mark.ptr) { unapply_breakpoints_and_watchpoints(); ReplayStepToMarkStrategy strategy; replay_step_to_mark(mark, strategy); } current_at_or_after_mark = mark.ptr; // XXX handle cases where breakpoints can't yet be applied } /** * Intel CPUs (and maybe others) coalesce iterations of REP-prefixed string * instructions so that a watchpoint on a byte at location L can fire after * the iteration that writes byte L+63 (or possibly more?). * This causes problems for rr since this coalescing doesn't happen when we * single-step. * This function is called after doing a ReplaySession::replay_step with * command == RUN_CONTINUE. RUN_SINGLESTEP and RUN_SINGLESTEP_FAST_FORWARD * disable this coalescing (the latter, because it's aware of watchpoints * and single-steps when it gets too close to them). * |before| is the state before we did the replay_step. * If a watchpoint fired, and it looks like it could have fired during a * string instruction, we'll backup to |before| and replay forward, stopping * before the breakpoint could fire and single-stepping to make sure the * coalescing quirk doesn't happen. * Returns true if we might have fixed something. */ bool ReplayTimeline::fix_watchpoint_coalescing_quirk(ReplayResult& result, const ProtoMark& before) { if (result.status == REPLAY_EXITED || result.break_status.watchpoints_hit.empty()) { // no watchpoint hit. Nothing to fix. return false; } if (!maybe_at_or_after_x86_string_instruction(result.break_status.task)) { return false; } TaskUid after_tuid = result.break_status.task->tuid(); Ticks after_ticks = result.break_status.task->tick_count(); LOG(debug) << "Fixing x86-string coalescing quirk from " << before << " to " << proto_mark() << " (final cx " << result.break_status.task->regs().cx() << ")"; seek_to_proto_mark(before); // Keep going until the watchpoint fires. It will either fire early, or at // the same time as some other break. apply_breakpoints_and_watchpoints(); bool approaching_ticks_target = false; while (true) { Task* t = current->current_task(); if (t->tuid() == after_tuid) { if (approaching_ticks_target) { // We don't need to set any stop_before_states here. // RUN_SINGLESTEP_FAST_FORWARD always avoids the coalescing quirk, so // if a watchpoint is triggered by the string instruction at // string_instruction_ip, it will have the correct timing. result = current->replay_step(RUN_SINGLESTEP_FAST_FORWARD); if (!result.break_status.watchpoints_hit.empty()) { LOG(debug) << "Fixed x86-string coalescing quirk; now at " << current_mark_key() << " (new cx " << result.break_status.task->regs().cx() << ")"; break; } } else { ReplaySession::StepConstraints constraints(RUN_CONTINUE); constraints.ticks_target = after_ticks - 1; result = current->replay_step(constraints); approaching_ticks_target = result.break_status.approaching_ticks_target; } ASSERT(t, t->tick_count() <= after_ticks) << "We went too far!"; } else { current->replay_step(RUN_CONTINUE); } } return true; } bool ReplayTimeline::add_breakpoint( Task* t, remote_code_ptr addr, std::unique_ptr condition) { if (has_breakpoint_at_address(t, addr)) { remove_breakpoint(t, addr); } // Apply breakpoints now; we need to actually try adding this breakpoint // to see if it works. apply_breakpoints_and_watchpoints(); if (!t->vm()->add_breakpoint(addr, TRAP_BKPT_USER)) { return false; } breakpoints.insert(make_tuple(t->vm()->uid(), addr, move(condition))); return true; } void ReplayTimeline::remove_breakpoint(Task* t, remote_code_ptr addr) { if (breakpoints_applied) { t->vm()->remove_breakpoint(addr, TRAP_BKPT_USER); } ASSERT(t, has_breakpoint_at_address(t, addr)); auto it = breakpoints.lower_bound(make_tuple(t->vm()->uid(), addr, nullptr)); breakpoints.erase(it); } bool ReplayTimeline::has_breakpoint_at_address(Task* t, remote_code_ptr addr) { auto it = breakpoints.lower_bound(make_tuple(t->vm()->uid(), addr, nullptr)); return it != breakpoints.end() && get<0>(*it) == t->vm()->uid() && get<1>(*it) == addr; } bool ReplayTimeline::add_watchpoint(Task* t, remote_ptr addr, size_t num_bytes, WatchType type, unique_ptr condition) { if (has_watchpoint_at_address(t, addr, num_bytes, type)) { remove_watchpoint(t, addr, num_bytes, type); } // Apply breakpoints now; we need to actually try adding this breakpoint // to see if it works. apply_breakpoints_and_watchpoints(); if (!t->vm()->add_watchpoint(addr, num_bytes, type)) { return false; } watchpoints.insert( make_tuple(t->vm()->uid(), addr, num_bytes, type, move(condition))); no_watchpoints_hit_interval_start = no_watchpoints_hit_interval_start = Mark(); return true; } void ReplayTimeline::remove_watchpoint(Task* t, remote_ptr addr, size_t num_bytes, WatchType type) { if (breakpoints_applied) { t->vm()->remove_watchpoint(addr, num_bytes, type); } ASSERT(t, has_watchpoint_at_address(t, addr, num_bytes, type)); auto it = watchpoints.lower_bound( make_tuple(t->vm()->uid(), addr, num_bytes, type, nullptr)); watchpoints.erase(it); } bool ReplayTimeline::has_watchpoint_at_address(Task* t, remote_ptr addr, size_t num_bytes, WatchType type) { auto it = watchpoints.lower_bound( make_tuple(t->vm()->uid(), addr, num_bytes, type, nullptr)); return it != watchpoints.end() && get<0>(*it) == t->vm()->uid() && get<1>(*it) == addr && get<2>(*it) == num_bytes && get<3>(*it) == type; } void ReplayTimeline::remove_breakpoints_and_watchpoints() { unapply_breakpoints_and_watchpoints(); breakpoints.clear(); watchpoints.clear(); } void ReplayTimeline::apply_breakpoints_and_watchpoints() { if (breakpoints_applied) { return; } breakpoints_applied = true; for (auto& bp : breakpoints) { AddressSpace* vm = current->find_address_space(get<0>(bp)); // XXX handle cases where we can't apply a breakpoint right now. Later // during replay the address space might be created (or new mappings might // be created) and we should reapply breakpoints then. if (vm) { vm->add_breakpoint(get<1>(bp), TRAP_BKPT_USER); } } for (auto& wp : watchpoints) { AddressSpace* vm = current->find_address_space(get<0>(wp)); // XXX handle cases where we can't apply a watchpoint right now. Later // during replay the address space might be created (or new mappings might // be created) and we should reapply watchpoints then. // XXX we could make this more efficient by providing a method to set // several watchpoints at once on a given AddressSpace. if (vm) { vm->add_watchpoint(get<1>(wp), get<2>(wp), get<3>(wp)); } } } void ReplayTimeline::unapply_breakpoints_and_watchpoints() { if (!breakpoints_applied) { return; } breakpoints_applied = false; for (auto& vm : current->vms()) { vm->remove_all_breakpoints(); vm->remove_all_watchpoints(); } } ReplayResult ReplayTimeline::singlestep_with_breakpoints_disabled() { apply_breakpoints_and_watchpoints(); for (auto& vm : current->vms()) { vm->remove_all_breakpoints(); } auto result = current->replay_step(RUN_SINGLESTEP); for (auto& bp : breakpoints) { AddressSpace* vm = current->find_address_space(get<0>(bp)); if (vm) { vm->add_breakpoint(get<1>(bp), TRAP_BKPT_USER); } } return result; } bool ReplayTimeline::is_start_of_reverse_execution_barrier_event() { if (current->trace_reader().time() != reverse_execution_barrier_event || current->current_step_key().in_execution()) { return false; } LOG(debug) << "Found reverse execution barrier at " << mark(); return true; } bool ReplayTimeline::run_forward_to_intermediate_point(const Mark& end, ForceProgress force) { unapply_breakpoints_and_watchpoints(); LOG(debug) << "Trying to find intermediate point between " << current_mark_key() << " and " << end << (force == FORCE_PROGRESS ? " (forced)" : ""); TraceFrame::Time now = current->trace_reader().time(); TraceFrame::Time mid = (now + end.ptr->key.trace_time) / 2; if (now < mid && mid < end.ptr->key.trace_time) { ReplaySession::StepConstraints constraints(RUN_CONTINUE); constraints.stop_at_time = mid; while (current->trace_reader().time() < mid) { current->replay_step(constraints); } assert(current->trace_reader().time() == mid); LOG(debug) << "Ran forward to mid event " << current_mark_key(); return true; } if (current->trace_reader().time() < end.ptr->key.trace_time && end.ptr->ticks_at_event_start < end.ptr->key.ticks) { ReplaySession::StepConstraints constraints(RUN_CONTINUE); constraints.stop_at_time = end.ptr->key.trace_time; while (current->trace_reader().time() < end.ptr->key.trace_time) { current->replay_step(constraints); } assert(current->trace_reader().time() == end.ptr->key.trace_time); LOG(debug) << "Ran forward to event " << current_mark_key(); return true; } Task* t = current->current_task(); if (t) { Ticks start_ticks = t->tick_count(); Ticks end_ticks = current->current_trace_frame().ticks(); if (end.ptr->key.trace_time == current->trace_reader().time()) { end_ticks = min(end_ticks, end.ptr->key.ticks); } ASSERT(t, start_ticks <= end_ticks); Ticks target = min(end_ticks, (start_ticks + end_ticks) / 2); ReplaySession::StepConstraints constraints(RUN_CONTINUE); constraints.ticks_target = target; ProtoMark m = proto_mark(); ReplayResult result = current->replay_step(constraints); if (m.equal_states(*current)) { assert(result.break_status.approaching_ticks_target); assert(t->tick_count() == start_ticks); // We didn't make any progress that way. // Normally we should just give up now and let reverse_continue keep // running and hitting breakpoints etc since we're pretty close to the // target already and the overhead of what we have to do here otherwise // can be high. But there's a pathological case where reverse_continue // is hitting a breakpoint on each iteration of a string instruction. // If that's happening then we will be told to force progress. if (force == FORCE_PROGRESS) { // Let's try a fast-forward singlestep to jump over an x86 string // instruction that may be triggering a lot of breakpoint hits. Make // sure // we stop before |end|. ReplaySession::shr_ptr tmp_session; if (start_ticks + 1 >= end_ticks) { // This singlestep operation might leave us at |end|, which is not // allowed. So make a backup of the current state. tmp_session = current->clone(); LOG(debug) << "Created backup tmp_session"; } constraints = ReplaySession::StepConstraints(RUN_SINGLESTEP_FAST_FORWARD); constraints.stop_before_states.push_back(&end.ptr->regs); result = current->replay_step(constraints); if (at_mark(end)) { assert(tmp_session); current = move(tmp_session); LOG(debug) << "Singlestepping arrived at |end|, restoring session"; } else if (!m.equal_states(*current)) { LOG(debug) << "Did fast-singlestep forward to " << current_mark_key(); return true; } } } else { while (t->tick_count() < target && !result.break_status.approaching_ticks_target) { result = current->replay_step(constraints); } LOG(debug) << "Ran forward to " << current_mark_key(); return true; } } LOG(debug) << "Made no progress"; return false; } /** * Don't allow more than this number of breakpoint/watchpoint stops * in a given replay interval. If we hit more than this, try to split * the interval in half and replay with watchpoints/breakpoints in the latter * half. */ static const int stop_count_limit = 20; ReplayResult ReplayTimeline::reverse_continue( const std::function& stop_filter, const std::function& interrupt_check) { Mark end = mark(); LOG(debug) << "ReplayTimeline::reverse_continue from " << end; bool last_stop_is_watch_or_signal; ReplayResult final_result; TaskUid final_tuid; Ticks final_ticks; Mark dest; vector restart_points; while (!dest) { Mark start = mark(); bool checkpoint_at_first_break; if (start >= end) { checkpoint_at_first_break = true; if (restart_points.empty()) { seek_to_before_key(end.ptr->key); start = mark(); if (start >= end) { LOG(debug) << "Couldn't seek to before " << end << ", returning exit"; // Can't go backwards. Call this an exit. final_result.status = REPLAY_EXITED; final_result.break_status = BreakStatus(); return final_result; } LOG(debug) << "Seeked backward from " << end << " to " << start; } else { Mark seek = restart_points.back(); restart_points.pop_back(); seek_to_mark(seek); LOG(debug) << "Seeked directly backward from " << start << " to " << seek; start = move(seek); } } else { checkpoint_at_first_break = false; } maybe_add_reverse_exec_checkpoint(EXPECT_SHORT_REVERSE_EXECUTION); bool at_breakpoint = false; ReplayStepToMarkStrategy strategy; int stop_count = 0; bool made_progress_between_stops = false; remote_code_ptr avoidable_stop_ip; Ticks avoidable_stop_ticks = 0; while (true) { apply_breakpoints_and_watchpoints(); ReplayResult result; if (at_breakpoint) { result = singlestep_with_breakpoints_disabled(); } else { result = replay_step_to_mark(end, strategy); // This will remove all reverse-exec checkpoints ahead of the // current time, and add new ones if necessary. This should be // helpful if we have to reverse-continue far back in time, where // the interval between 'start' and 'end' could be lengthy; we'll // populate the interval with new checkpoints, speeding up // the following seek and possibly future operations. } at_breakpoint = result.break_status.breakpoint_hit; bool avoidable_stop = result.break_status.breakpoint_hit || !result.break_status.watchpoints_hit.empty(); if (avoidable_stop) { made_progress_between_stops = avoidable_stop_ip != result.break_status.task->ip() || avoidable_stop_ticks != result.break_status.task->tick_count(); avoidable_stop_ip = result.break_status.task->ip(); avoidable_stop_ticks = result.break_status.task->tick_count(); } evaluate_conditions(result); if (result.break_status.any_break() && !stop_filter(result.break_status.task)) { result.break_status = BreakStatus(); } maybe_add_reverse_exec_checkpoint(EXPECT_SHORT_REVERSE_EXECUTION); if (checkpoint_at_first_break && dest != start && result.break_status.any_break()) { checkpoint_at_first_break = false; set_short_checkpoint(); } if (!result.break_status.watchpoints_hit.empty() || result.break_status.signal) { dest = mark(); LOG(debug) << "Found " << (result.break_status.signal ? "signal" : "watch") << " break at " << dest; final_result = result; final_tuid = result.break_status.task ? result.break_status.task->tuid() : TaskUid(); final_ticks = result.break_status.task ? result.break_status.task->tick_count() : 0; last_stop_is_watch_or_signal = true; } assert(result.status == REPLAY_CONTINUE); if (is_start_of_reverse_execution_barrier_event()) { dest = mark(); final_result = result; final_result.break_status.task = current->current_task(); final_result.break_status.task_exit = true; final_tuid = final_result.break_status.task->tuid(); final_ticks = result.break_status.task->tick_count(); last_stop_is_watch_or_signal = false; } if (at_mark(end)) { // In the next iteration, retry from an earlier checkpoint. end = start; break; } // If there is a breakpoint at the current ip() where we start a // reverse-continue, gdb expects us to skip it. if (result.break_status.breakpoint_hit) { dest = mark(); LOG(debug) << "Found breakpoint break at " << dest; final_result = result; final_tuid = result.break_status.task ? result.break_status.task->tuid() : TaskUid(); final_ticks = result.break_status.task ? result.break_status.task->tick_count() : 0; last_stop_is_watch_or_signal = false; } if (interrupt_check()) { LOG(debug) << "Interrupted at " << end; seek_to_mark(end); final_result = ReplayResult(); final_result.break_status.task = current->current_task(); return final_result; } if (avoidable_stop) { ++stop_count; if (stop_count > stop_count_limit) { Mark before_running = mark(); if (run_forward_to_intermediate_point(end, made_progress_between_stops ? DONT_FORCE_PROGRESS : FORCE_PROGRESS)) { assert(!at_mark(end)); // We made some progress towards |end| with breakpoints/watchpoints // disabled, without reaching |end|. Continuing running forward from // here with breakpoints/watchpoints enabled. If we need to seek // backwards again, try resuming from the point where we disabled // breakpoints/watchpoints. if (dest) { restart_points.push_back(start); } restart_points.push_back(before_running); dest = Mark(); break; } } } } } if (last_stop_is_watch_or_signal) { LOG(debug) << "Performing final reverse-singlestep to pass over watch/signal"; auto stop_filter = [&](Task* t) { return t->tuid() == final_tuid; }; reverse_singlestep(dest, final_tuid, final_ticks, stop_filter, interrupt_check); } else { LOG(debug) << "Seeking to final destination " << dest; seek_to_mark(dest); } // fix break_status.task since the actual Task* may have changed // since we saved final_result final_result.break_status.task = current->find_task(final_tuid); // Hide any singlestepping we did, since a continue operation should // never return a singlestep status final_result.break_status.singlestep_complete = false; return final_result; } void ReplayTimeline::update_observable_break_status( ReplayTimeline::Mark& now, const ReplayResult& result) { now = mark(); if (!no_watchpoints_hit_interval_start || !result.break_status.watchpoints_hit.empty()) { no_watchpoints_hit_interval_start = now; } } ReplayResult ReplayTimeline::reverse_singlestep( const Mark& origin, const TaskUid& step_tuid, Ticks step_ticks, const std::function& stop_filter, const std::function& interrupt_check) { LOG(debug) << "ReplayTimeline::reverse_singlestep from " << origin; Mark outer = origin; Ticks ticks_target = step_ticks - 1; while (true) { Mark end = outer; Mark start; bool seen_barrier; while (true) { MarkKey current_key = end.ptr->key; while (true) { if (end.ptr->key.trace_time != current_key.trace_time || end.ptr->key.ticks != current_key.ticks) { break; } seek_to_before_key(current_key); maybe_add_reverse_exec_checkpoint(EXPECT_SHORT_REVERSE_EXECUTION); if (current_mark_key() == current_key) { // Can't go further back. Treat this as an exit. LOG(debug) << "Couldn't seek to before " << end << ", returning exit"; ReplayResult result; result.status = REPLAY_EXITED; result.break_status = BreakStatus(); return result; } LOG(debug) << "Seeked backward from " << current_key << " to " << current_mark_key(); current_key = current_mark_key(); } start = mark(); LOG(debug) << "Running forward from " << start; // Now run forward until we're reasonably close to the correct tick value. ReplaySession::StepConstraints constraints(RUN_CONTINUE); bool approaching_ticks_target = false; bool seen_other_task_break = false; while (!at_mark(end)) { Task* t = current->current_task(); if (stop_filter(t) && current->can_validate()) { if (t->tuid() == step_tuid) { if (t->tick_count() >= ticks_target) { // Don't step any further. LOG(debug) << "Approaching ticks target"; approaching_ticks_target = true; break; } unapply_breakpoints_and_watchpoints(); constraints.ticks_target = constraints.command == RUN_CONTINUE ? ticks_target : 0; ReplayResult result; result = current->replay_step(constraints); if (result.break_status.approaching_ticks_target) { LOG(debug) << " approached ticks target at " << current_mark_key(); constraints = ReplaySession::StepConstraints(RUN_SINGLESTEP_FAST_FORWARD); } } else { if (seen_other_task_break) { unapply_breakpoints_and_watchpoints(); } else { apply_breakpoints_and_watchpoints(); } constraints.ticks_target = 0; ReplayResult result = current->replay_step(RUN_CONTINUE); if (result.break_status.any_break()) { seen_other_task_break = true; } } } else { unapply_breakpoints_and_watchpoints(); constraints.ticks_target = 0; current->replay_step(RUN_CONTINUE); } if (is_start_of_reverse_execution_barrier_event()) { seen_barrier = true; } maybe_add_reverse_exec_checkpoint(EXPECT_SHORT_REVERSE_EXECUTION); } if (approaching_ticks_target || seen_barrier) { break; } if (seen_other_task_break) { // We saw a break in another task that the debugger cares about, but // that's not the stepping task. At this point reverse-singlestep // will move back past that break, so We'll need to report that break // instead of the singlestep. return reverse_continue(stop_filter, interrupt_check); } end = start; } assert(stop_filter(current->current_task()) || seen_barrier); Mark destination_candidate; Mark step_start = set_short_checkpoint(); ReplayResult destination_candidate_result; TaskUid destination_candidate_tuid; if (is_start_of_reverse_execution_barrier_event()) { destination_candidate = mark(); destination_candidate_result.break_status.task_exit = true; destination_candidate_tuid = current->current_task()->tuid(); } no_watchpoints_hit_interval_start = Mark(); bool seen_other_task_break = false; while (true) { Mark now; ReplayResult result; if (stop_filter(current->current_task())) { apply_breakpoints_and_watchpoints(); if (current->current_task()->tuid() == step_tuid) { Mark before_step = mark(); ReplaySession::StepConstraints constraints( RUN_SINGLESTEP_FAST_FORWARD); constraints.stop_before_states.push_back(&end.ptr->regs); result = current->replay_step(constraints); update_observable_break_status(now, result); if (result.break_status.breakpoint_hit) { // If we hit a breakpoint while singlestepping, we didn't // make any progress. unapply_breakpoints_and_watchpoints(); result = current->replay_step(constraints); update_observable_break_status(now, result); } if (result.break_status.singlestep_complete) { mark_after_singlestep(before_step, result); if (now > end) { // This last step is not usable. LOG(debug) << " not usable, stopping now"; break; } destination_candidate = step_start; destination_candidate_result = result; destination_candidate_tuid = result.break_status.task->tuid(); seen_other_task_break = false; step_start = now; } } else { result = current->replay_step(RUN_CONTINUE); update_observable_break_status(now, result); if (result.break_status.any_break()) { seen_other_task_break = true; } if (result.break_status.breakpoint_hit) { unapply_breakpoints_and_watchpoints(); result = current->replay_step(RUN_SINGLESTEP_FAST_FORWARD); update_observable_break_status(now, result); if (result.break_status.any_break()) { seen_other_task_break = true; } } } } else { unapply_breakpoints_and_watchpoints(); result = current->replay_step(RUN_CONTINUE); no_watchpoints_hit_interval_start = Mark(); now = mark(); } if (is_start_of_reverse_execution_barrier_event()) { destination_candidate = mark(); destination_candidate_result = result; destination_candidate_result.break_status.task_exit = true; destination_candidate_tuid = current->current_task()->tuid(); seen_other_task_break = false; } if (now >= end) { break; } maybe_add_reverse_exec_checkpoint(EXPECT_SHORT_REVERSE_EXECUTION); } no_watchpoints_hit_interval_end = no_watchpoints_hit_interval_start ? end : Mark(); if (seen_other_task_break) { // We saw a break in another task that the debugger cares about, but // that's not the stepping task. Report that break instead of the // singlestep. return reverse_continue(stop_filter, interrupt_check); } if (destination_candidate) { LOG(debug) << "Found destination " << destination_candidate; seek_to_mark(destination_candidate); destination_candidate_result.break_status.task = current->find_task(destination_candidate_tuid); assert(destination_candidate_result.break_status.task); evaluate_conditions(destination_candidate_result); return destination_candidate_result; } // No destination candidate found. Search further backward. outer = start; } } void ReplayTimeline::evaluate_conditions(ReplayResult& result) { Task* t = result.break_status.task; if (!t) { return; } auto auid = t->vm()->uid(); if (result.break_status.breakpoint_hit) { auto addr = t->ip(); auto it = breakpoints.lower_bound(make_tuple(auid, addr, nullptr)); bool hit = false; while (it != breakpoints.end() && get<0>(*it) == auid && get<1>(*it) == addr) { const unique_ptr& cond = get<2>(*it); if (!cond || cond->evaluate(t)) { hit = true; break; } ++it; } if (!hit) { result.break_status.breakpoint_hit = false; } } for (auto i = result.break_status.watchpoints_hit.begin(); i != result.break_status.watchpoints_hit.end(); ++i) { auto& w = *i; auto it = watchpoints.lower_bound( make_tuple(auid, w.addr, w.num_bytes, w.type, nullptr)); bool hit = false; while (it != watchpoints.end() && get<0>(*it) == auid && get<1>(*it) == w.addr && get<2>(*it) == w.num_bytes && get<3>(*it) == w.type) { const unique_ptr& cond = get<4>(*it); if (!cond || cond->evaluate(t)) { hit = true; break; } ++it; } if (!hit) { i = result.break_status.watchpoints_hit.erase(i); } } } ReplayResult ReplayTimeline::replay_step_forward( RunCommand command, TraceFrame::Time stop_at_time, const std::function& interrupt_check) { assert(command != RUN_SINGLESTEP_FAST_FORWARD); ReplayResult result; apply_breakpoints_and_watchpoints(); ProtoMark before = proto_mark(); current->set_visible_execution(true); ReplaySession::StepConstraints constraints(command); constraints.stop_at_time = stop_at_time; result = current->replay_step(constraints); current->set_visible_execution(false); if (command == RUN_CONTINUE) { // Since it's easy for us to fix the coalescing quirk for forward // execution, we may as well do so. It's nice to have forward execution // behave consistently with reverse execution. fix_watchpoint_coalescing_quirk(result, before); // Hide any singlestepping we did result.break_status.singlestep_complete = false; } maybe_add_reverse_exec_checkpoint(LOW_OVERHEAD); bool did_hit_breakpoint = result.break_status.breakpoint_hit; evaluate_conditions(result); if (did_hit_breakpoint && !result.break_status.any_break()) { // Singlestep past the breakpoint current->set_visible_execution(true); result = singlestep_with_breakpoints_disabled(); if (command == RUN_CONTINUE) { result.break_status.singlestep_complete = false; } current->set_visible_execution(false); } return result; } ReplayResult ReplayTimeline::reverse_singlestep( const TaskUid& tuid, Ticks tuid_ticks, const std::function& stop_filter, const std::function& interrupt_check) { return reverse_singlestep(mark(), tuid, tuid_ticks, stop_filter, interrupt_check); } ReplayTimeline::Progress ReplayTimeline::estimate_progress() { Session::Statistics stats = current->statistics(); // The following parameters were estimated by running Firefox startup // and shutdown in an opt build on a Lenovo W530 laptop, replaying with // DUMP_STATS_PERIOD set to 100 (twice, and using only values from the // second run, to ensure caches are warm), and then minimizing least-squares // error. static const double microseconds_per_tick = 0.0020503143; static const double microseconds_per_syscall = 39.6793587609; static const double microseconds_per_byte_written = 0.001833611; static const double microseconds_constant = 997.8257239043; return Progress(microseconds_per_tick * stats.ticks_processed + microseconds_per_syscall * stats.syscalls_performed + microseconds_per_byte_written * stats.bytes_written + microseconds_constant); } /* * Checkpointing strategy: * * We define a series of intervals of increasing length, each one ending at * the current replay position. In each interval N, we allow at most N * checkpoints. We ensure that interval lengths grow exponentially (in the * limit), so the maximum number of checkpoints for a given execution length * L is O(log L). * * Interval N has length inter_checkpoint_interval to the * power of checkpoint_interval_exponent. * We allow at most N checkpoints in interval N. * To discard excess checkpoints, first pick the smallest interval N with * too many checkpoints, and discard the latest checkpoint in interval N * that is not in interval N-1. Repeat until there are no excess checkpoints. * All checkpoints after the current replay point are always discarded. * The script checkpoint-visualizer.html simulates this algorithm and * visualizes its results. * The implementation here is quite naive, but that's OK because we will * never have a large number of checkpoints. */ /** * Try to space out our checkpoints by a minimum of this much in LOW_OVERHEAD * mode. * This is currently aiming for about 0.5s of replay time, so a reverse step or * continue whose destination is within 0.5 should take at most a second. * Also, based on a guesstimate that taking checkpoints of Firefox requires * about 50ms, this would make checkpointing overhead about 10% of replay time, * which sounds reasonable. */ static ReplayTimeline::Progress low_overhead_inter_checkpoint_interval = 500000; /** * Space out checkpoints linearly by this much in * EXPECT_SHORT_REVERSE_EXECUTION mode, until we reach * low_overhead_inter_checkpoint_interval. */ static ReplayTimeline::Progress expecting_reverse_exec_inter_checkpoint_interval = 100000; /** * Make each interval this much bigger than the previous. */ static float checkpoint_interval_exponent = 2; static ReplayTimeline::Progress inter_checkpoint_interval( ReplayTimeline::CheckpointStrategy strategy) { return strategy == ReplayTimeline::LOW_OVERHEAD ? low_overhead_inter_checkpoint_interval : expecting_reverse_exec_inter_checkpoint_interval; } static ReplayTimeline::Progress next_interval_length( ReplayTimeline::Progress len) { if (len >= low_overhead_inter_checkpoint_interval) { return (ReplayTimeline::Progress)ceil(checkpoint_interval_exponent * len); } return len + expecting_reverse_exec_inter_checkpoint_interval; } void ReplayTimeline::maybe_add_reverse_exec_checkpoint( CheckpointStrategy strategy) { discard_future_reverse_exec_checkpoints(); Progress now = estimate_progress(); auto it = reverse_exec_checkpoints.rbegin(); if (it != reverse_exec_checkpoints.rend() && it->second >= now - inter_checkpoint_interval(strategy)) { // Latest checkpoint is close enough; we don't need to do anything. return; } if (!current->can_clone()) { // We can't create a checkpoint right now. return; } // We always discard checkpoints before adding the new one to reduce the // maximum checkpoint count by one. discard_past_reverse_exec_checkpoints(strategy); Mark m = add_explicit_checkpoint(); LOG(debug) << "Creating reverse-exec checkpoint at " << m; reverse_exec_checkpoints[m] = now; } void ReplayTimeline::discard_future_reverse_exec_checkpoints() { Progress now = estimate_progress(); while (true) { auto it = reverse_exec_checkpoints.rbegin(); if (it == reverse_exec_checkpoints.rend() || it->second <= now) { break; } LOG(debug) << "Discarding reverse-exec future checkpoint at " << *it->first.ptr; remove_explicit_checkpoint(it->first); reverse_exec_checkpoints.erase(it->first); } } void ReplayTimeline::discard_past_reverse_exec_checkpoints( CheckpointStrategy strategy) { Progress now = estimate_progress(); // No checkpoints are allowed in the first interval, since we're about to // add one there. int checkpoints_allowed = 0; int checkpoints_in_range = 0; auto it = reverse_exec_checkpoints.rbegin(); vector checkpoints_to_delete; for (Progress len = inter_checkpoint_interval(strategy);; len = next_interval_length(len)) { Progress start = now - len; // Count checkpoints >= start, starting at 'it', and leave the first // checkpoint entry < start in 'tmp_it'. auto tmp_it = it; while (tmp_it != reverse_exec_checkpoints.rend() && tmp_it->second >= start) { ++checkpoints_in_range; ++tmp_it; } // Delete excess checkpoints starting with 'it'. while (checkpoints_in_range > checkpoints_allowed) { checkpoints_to_delete.push_back(it->first); --checkpoints_in_range; ++it; } ++checkpoints_allowed; it = tmp_it; if (it == reverse_exec_checkpoints.rend()) { break; } } for (auto& m : checkpoints_to_delete) { LOG(debug) << "Discarding reverse-exec checkpoint at " << m; remove_explicit_checkpoint(m); reverse_exec_checkpoints.erase(m); } } ReplayTimeline::Mark ReplayTimeline::set_short_checkpoint() { if (!can_add_checkpoint()) { return mark(); } // Add checkpoint before removing one in case m == // reverse_exec_short_checkpoint Mark m = add_explicit_checkpoint(); LOG(debug) << "Creating short-checkpoint at " << m; if (reverse_exec_short_checkpoint) { LOG(debug) << "Discarding old short-checkpoint at " << reverse_exec_short_checkpoint; remove_explicit_checkpoint(reverse_exec_short_checkpoint); } swap(m, reverse_exec_short_checkpoint); return reverse_exec_short_checkpoint; } rr-4.1.0/src/ReplayTimeline.h000066400000000000000000000440431265436462100160400ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_REPLAY_TIMELINE_H_ #define RR_REPLAY_TIMELINE_H_ #include #include #include #include #include #include "BreakpointCondition.h" #include "Registers.h" #include "ReplaySession.h" #include "TraceFrame.h" enum RunDirection { RUN_FORWARD, RUN_BACKWARD }; /** * This class manages a set of ReplaySessions corresponding to different points * in the same recording. It provides an API for explicitly managing * checkpoints along this timeline and navigating to specific events. */ class ReplayTimeline { private: struct InternalMark; public: ReplayTimeline(std::shared_ptr session, const ReplaySession::Flags& session_flags); ReplayTimeline() : breakpoints_applied(false) {} ~ReplayTimeline(); bool is_running() const { return current != nullptr; } /** * An estimate of how much progress a session has made. This should roughly * correlate to the time required to replay from the start of a session * to the current point, in microseconds. */ typedef int64_t Progress; /** * A Mark references a precise point in time during the replay. * It may or may not have an associated ReplaySession checkpoint. */ class Mark { public: Mark() {} bool operator<(const Mark& other) const { return ReplayTimeline::less_than(*this, other); } bool operator>(const Mark& other) const { return other < *this; } bool operator<=(const Mark& other) const { return !(*this > other); } bool operator>=(const Mark& other) const { return !(*this < other); } bool operator==(const Mark& other) const { return ptr == other.ptr; } bool operator!=(const Mark& other) const { return !(*this == other); } operator bool() const { return ptr != nullptr; } /** * Return the values of the general-purpose registers at this mark. */ const Registers& regs() const { return ptr->regs; } const ExtraRegisters& extra_regs() const { return ptr->extra_regs; } private: friend class ReplayTimeline; friend std::ostream& operator<<(std::ostream& s, const Mark& o); Mark(std::shared_ptr& weak) { swap(ptr, weak); } std::shared_ptr ptr; }; /** * The current state. The current state can be moved forward or backward * using ReplaySession's APIs. Do not set breakpoints on its tasks directly. * Use ReplayTimeline's breakpoint methods. */ ReplaySession& current_session() { return *current; } /** * Return a mark for the current state. A checkpoint need not be retained, * but this mark can be seeked to later. * This can be expensive in some (perhaps unusual) situations since we * may need to clone the current session and run it a bit, to figure out * where we are relative to other Marks. So don't call this unless you * need it. */ Mark mark(); /** * Indicates that the current replay position is the result of * singlestepping from 'from'. */ void mark_after_singlestep(const Mark& from, const ReplayResult& result); /** * Returns true if it's safe to add a checkpoint here. */ bool can_add_checkpoint() { return current->can_clone(); } /** * Ensure that the current session is explicitly checkpointed. * Explicit checkpoints are reference counted. * Only call this if can_add_checkpoint would return true. */ Mark add_explicit_checkpoint(); /** * Remove an explicit checkpoint reference count for this mark. */ void remove_explicit_checkpoint(const Mark& mark); /** * Return true if we're currently at the given mark. */ bool at_mark(const Mark& mark) { return current_mark() == mark.ptr; } // Add/remove breakpoints and watchpoints. Use these APIs instead // of operating on the task directly, so that ReplayTimeline can track // breakpoints and automatically move them across sessions as necessary. // Only one breakpoint for a given address space/addr combination can be set; // setting another for the same address space/addr will replace the first. // Likewise only one watchpoint for a given task/addr/num_bytes/type can be // set. gdb expects that setting two breakpoints on the same address and then // removing one removes both. bool add_breakpoint(Task* t, remote_code_ptr addr, std::unique_ptr condition = nullptr); // You can't remove a breakpoint with a specific condition, so don't // place multiple breakpoints with conditions on the same location. void remove_breakpoint(Task* t, remote_code_ptr addr); bool add_watchpoint(Task* t, remote_ptr addr, size_t num_bytes, WatchType type, std::unique_ptr condition = nullptr); // You can't remove a watchpoint with a specific condition, so don't // place multiple breakpoints with conditions on the same location. void remove_watchpoint(Task* t, remote_ptr addr, size_t num_bytes, WatchType type); void remove_breakpoints_and_watchpoints(); bool has_breakpoint_at_address(Task* t, remote_code_ptr addr); bool has_watchpoint_at_address(Task* t, remote_ptr addr, size_t num_bytes, WatchType type); /** * Ensure that reverse execution never proceeds into an event before * |event|. Reverse execution will stop with a |task_exit| break status when * at the beginning of this event. */ void set_reverse_execution_barrier_event(TraceFrame::Time event) { reverse_execution_barrier_event = event; } // State-changing APIs. These may alter state associated with // current_session(). /** * Reset the current session to the last available session before event * 'time'. Useful if you want to run up to that event. */ void seek_to_before_event(TraceFrame::Time time) { return seek_to_before_key(MarkKey(time, 0, ReplayStepKey())); } /** * Reset the current session to the last checkpointed session before (or at) * the mark. Will return at the mark if this mark was explicitly checkpointed * previously (and not deleted). */ void seek_up_to_mark(const Mark& mark); /** * Sets current session to 'mark' by restoring the nearest useful checkpoint * and executing forwards if necessary. */ void seek_to_mark(const Mark& mark); static bool never_interrupt() { return false; } /** * Replay 'current'. * If there is a breakpoint at the current task's current ip(), then * when running forward we will immediately break at the breakpoint. When * running backward we will ignore the initial "hit" of the breakpoint --- * this is the behavior gdb expects. * Likewise, if there is a breakpoint at the current task's current ip(), * then running forward will immediately break at the breakpoint, but * running backward will ignore the initial "hit" of the breakpoint; this is * what gdb expects. * * replay_step_forward only does one replay step. That means we'll only * execute code in current_session().current_task(). */ ReplayResult replay_step_forward( RunCommand command, TraceFrame::Time stop_at_time, const std::function& interrupt_check = never_interrupt); ReplayResult reverse_continue(const std::function& stop_filter, const std::function& interrupt_check); ReplayResult reverse_singlestep( const TaskUid& tuid, Ticks tuid_ticks, const std::function& stop_filter, const std::function& interrupt_check); /** * Try to identify an existing Mark which is known to be one singlestep * before 'from', and for which we know singlestepping to 'from' would * trigger no break statuses other than "singlestep_complete". * If we can't, return a null Mark. * Will only return a Mark for the same executing task as 'from', which * must be 't'. */ Mark lazy_reverse_singlestep(const Mark& from, Task* t); /** * Different strategies for placing automatic checkpoints. */ enum CheckpointStrategy { /** * Use this when we want to bound the overhead of checkpointing to be * insignificant relative to the cost of forward execution. */ LOW_OVERHEAD, /** * Use this when we expect reverse execution to happen soon, to a * destination not far behind the current execution point. In this case * it's worth increasing checkpoint density. * We pass this when we have opportunities to make checkpoints during * reverse_continue or reverse_singlestep, since it's common for short * reverse-executions to follow other reverse-execution. */ EXPECT_SHORT_REVERSE_EXECUTION }; /** * We track the set of breakpoints/watchpoints requested by the client. * When we switch to a new ReplaySession, these need to be reapplied before * replaying that session, but we do this lazily. * apply_breakpoints_and_watchpoints() forces the breakpoints/watchpoints * to be applied to the current session. * Our checkpoints never have breakpoints applied. */ void apply_breakpoints_and_watchpoints(); private: /** * TraceFrame::Time + Ticks + ReplayStepKey does not uniquely identify * a program state, but they're intrinsically totally ordered. */ struct MarkKey { MarkKey(TraceFrame::Time trace_time, Ticks ticks, ReplayStepKey step_key) : trace_time(trace_time), ticks(ticks), step_key(step_key) {} MarkKey(const MarkKey& other) = default; TraceFrame::Time trace_time; Ticks ticks; ReplayStepKey step_key; bool operator<(const MarkKey& other) const { if (trace_time < other.trace_time) { return true; } if (trace_time > other.trace_time) { return false; } if (ticks < other.ticks) { return true; } if (ticks > other.ticks) { return false; } return step_key < other.step_key; } bool operator>(const MarkKey& other) const { return other < *this; } bool operator>=(const MarkKey& other) const { return !(*this < other); } bool operator<=(const MarkKey& other) const { return !(other < *this); } bool operator==(const MarkKey& other) const { return trace_time == other.trace_time && ticks == other.ticks && step_key == other.step_key; } bool operator!=(const MarkKey& other) const { return !(*this == other); } }; friend std::ostream& operator<<(std::ostream& s, const MarkKey& o); /** * All the information we'll need to construct a mark lazily. */ struct ProtoMark { ProtoMark(const MarkKey& key, Task* t) : key(key), regs(t->regs()), return_addresses(t->return_addresses()) {} bool equal_states(ReplaySession& session) const; MarkKey key; Registers regs; ReturnAddressList return_addresses; }; /** * MarkKey + Registers are assumed to identify a unique program state. * We can't order these states directly based on this data, so we have to * record the ordering in the ReplayTimeline. */ struct InternalMark { InternalMark(ReplayTimeline* owner, ReplaySession& session, const MarkKey& key) : owner(owner), key(key), ticks_at_event_start(session.ticks_at_start_of_current_event()), checkpoint_refcount(0), singlestep_to_next_mark_no_signal(false) { Task* t = session.current_task(); if (t) { regs = t->regs(); return_addresses = t->return_addresses(); extra_regs = t->extra_regs(); } } ~InternalMark(); bool operator<(const std::shared_ptr other); bool equal_states(ReplaySession& session) const; ReplayTimeline* owner; MarkKey key; Registers regs; ExtraRegisters extra_regs; ReturnAddressList return_addresses; ReplaySession::shr_ptr checkpoint; Ticks ticks_at_event_start; uint32_t checkpoint_refcount; // The next InternalMark in the mark vector is the result of singlestepping // from this mark *and* no signal is reported in the break_status. bool singlestep_to_next_mark_no_signal; }; friend struct InternalMark; friend std::ostream& operator<<(std::ostream& s, const InternalMark& o); friend std::ostream& operator<<(std::ostream& s, const ProtoMark& o); /** * unapply_breakpoints_and_watchpoints() forces the breakpoints/watchpoints * to not be applied to the current session. Use this when we need to * clone the current session or replay the current session without * triggering breakpoints. */ void unapply_breakpoints_and_watchpoints(); static MarkKey session_mark_key(ReplaySession& session) { Task* t = session.current_task(); return MarkKey(session.trace_reader().time(), t ? t->tick_count() : 0, session.current_step_key()); } MarkKey current_mark_key() const { return session_mark_key(*current); } ProtoMark proto_mark() const; void seek_to_proto_mark(const ProtoMark& pmark); // Returns a shared pointer to the mark if there is one for the current state. std::shared_ptr current_mark(); void remove_mark_with_checkpoint(const MarkKey& key); void seek_to_before_key(const MarkKey& key); enum ForceProgress { FORCE_PROGRESS, DONT_FORCE_PROGRESS }; // Run forward towards the midpoint of the current position and |end|. // Must stop before we reach |end|. // Returns false if we made no progress. bool run_forward_to_intermediate_point(const Mark& end, ForceProgress force); struct ReplayStepToMarkStrategy { ReplayStepToMarkStrategy() : singlesteps_to_perform(0) {} ReplaySession::StepConstraints setup_step_constraints(); uint32_t singlesteps_to_perform; }; void update_strategy_and_fix_watchpoint_quirk( ReplayStepToMarkStrategy& strategy, const ReplaySession::StepConstraints& constraints, ReplayResult& result, const ProtoMark& before); // Take a single replay step towards |mark|. Stop before or at |mark|, and // stop if any breakpoint/watchpoint/signal is hit. // Maintain current strategy state in |strategy|. Passing the same // |strategy| object to consecutive replay_step_to_mark invocations helps // optimize performance. ReplayResult replay_step_to_mark(const Mark& mark, ReplayStepToMarkStrategy& strategy); ReplayResult singlestep_with_breakpoints_disabled(); bool fix_watchpoint_coalescing_quirk(ReplayResult& result, const ProtoMark& before); Mark find_singlestep_before(const Mark& mark); bool is_start_of_reverse_execution_barrier_event(); void update_observable_break_status(ReplayTimeline::Mark& now, const ReplayResult& result); ReplayResult reverse_singlestep( const Mark& origin, const TaskUid& step_tuid, Ticks step_ticks, const std::function& stop_filter, const std::function& interrupt_check); // Reasonably fast since it just relies on checking the mark map. static bool less_than(const Mark& m1, const Mark& m2); Progress estimate_progress(); /** * Called when the current session has moved forward to a new execution * point and we might want to make a checkpoint to support reverse-execution. * If this adds a checkpoint, it will call * discard_past_reverse_exec_checkpoints * first. */ void maybe_add_reverse_exec_checkpoint(CheckpointStrategy strategy); /** * Discard some reverse-exec checkpoints in the past, if necessary. We do * this to stop the number of checkpoints growing out of control. */ void discard_past_reverse_exec_checkpoints(CheckpointStrategy strategy); /** * Discard all reverse-exec checkpoints that are in the future (they're * useless). */ void discard_future_reverse_exec_checkpoints(); Mark set_short_checkpoint(); /** * If result.break_status hit watchpoints or breakpoints, evaluate their * conditions and clear the break_status flags if the conditions don't hold. */ void evaluate_conditions(ReplayResult& result); ReplaySession::Flags session_flags; ReplaySession::shr_ptr current; // current is known to be at or after this mark std::shared_ptr current_at_or_after_mark; /** * All known marks. * * An InternalMark appears in a ReplayTimeline 'marks' map if and only if * that ReplayTimeline is the InternalMark's 'owner'. ReplayTimeline's * destructor clears the 'owner' of all marks in the map. * * For each MarkKey, the InternalMarks are stored in execution order. * * We assume there will be a limited number of InternalMarks per MarkKey. * This should be true because Task::tick_count() should increment * frequently during execution. In some cases we see hundreds of elements * but that's not too bad. */ std::map > > marks; /** * All mark keys with at least one checkpoint. The value is the number of * checkpoints. There can be multiple checkpoints for a given MarkKey * because a MarkKey may have multiple corresponding Marks. */ std::map marks_with_checkpoints; std::set > > breakpoints; std::set, size_t, WatchType, std::unique_ptr > > watchpoints; bool breakpoints_applied; TraceFrame::Time reverse_execution_barrier_event; /** * Checkpoints used to accelerate reverse execution. */ std::map reverse_exec_checkpoints; /** * When these are non-null, then when singlestepping from * no_break_interval_start to no_break_interval_end, none of the currently * set watchpoints fire. */ Mark no_watchpoints_hit_interval_start; Mark no_watchpoints_hit_interval_end; /** * A single checkpoint that's very close to the current point, used to * accelerate a sequence of reverse singlestep operations. */ Mark reverse_exec_short_checkpoint; }; std::ostream& operator<<(std::ostream& s, const ReplayTimeline::Mark& o); #endif // RR_REPLAY_TIMELINE_H_ rr-4.1.0/src/Scheduler.cc000066400000000000000000000202011265436462100151570ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "Scheduler" //#define MONITOR_UNSWITCHABLE_WAITS #include "Scheduler.h" #include #include #include #include #include #include #include #include #include #include #include "Flags.h" #include "log.h" #include "RecordSession.h" #include "task.h" using namespace std; static void note_switch(Task* prev_t, Task* t, int max_events) { if (prev_t == t) { t->succ_event_counter++; } else { t->succ_event_counter = 0; } } Task* Scheduler::get_next_task_with_same_priority(Task* t) { if (t->in_round_robin_queue) { return nullptr; } auto it = task_priority_set.find(make_pair(t->priority, t)); assert(it != task_priority_set.end()); ++it; if (it == task_priority_set.end() || it->first != t->priority) { it = task_priority_set.lower_bound(make_pair(t->priority, nullptr)); } return it->second; } /** * Returns true if we should return t as the runnable task. Otherwise we * should check the next task. */ static bool is_task_runnable(Task* t, bool* by_waitpid) { if (t->unstable) { LOG(debug) << " " << t->tid << " is unstable, doing waitpid(-1)"; return true; } if (!t->may_be_blocked()) { LOG(debug) << " " << t->tid << " isn't blocked"; return true; } if (t->emulated_stop_type != NOT_STOPPED) { LOG(debug) << " " << t->tid << " is stopped by ptrace or signal"; return false; } LOG(debug) << " " << t->tid << " is blocked on " << t->ev() << "; checking status ..."; bool did_wait_for_t; if (t->pseudo_blocked) { t->wait(); did_wait_for_t = true; } else { did_wait_for_t = t->try_wait(); } if (did_wait_for_t) { t->pseudo_blocked = false; *by_waitpid = true; LOG(debug) << " ready with status " << HEX(t->status()); return true; } LOG(debug) << " still blocked"; // Try next task return false; } Task* Scheduler::find_next_runnable_task(bool* by_waitpid) { *by_waitpid = false; while (true) { Task* t = get_next_round_robin_task(); if (!t) { break; } LOG(debug) << "Choosing task " << t->tid << " from yield queue"; if (is_task_runnable(t, by_waitpid)) { return t; } // This task had its chance to run but couldn't. Move to the // next task in the queue. remove_round_robin_task(); } // The outer loop has one iteration per unique priority value. // The inner loop iterates over all tasks with that priority. for (auto same_priority_start = task_priority_set.begin(); same_priority_start != task_priority_set.end();) { int priority = same_priority_start->first; auto same_priority_end = task_priority_set.lower_bound( make_pair(same_priority_start->first + 1, nullptr)); auto begin_at = same_priority_start; if (current && priority == current->priority) { begin_at = task_priority_set.find(make_pair(priority, current)); } auto task_iterator = begin_at; do { Task* t = task_iterator->second; if (is_task_runnable(t, by_waitpid)) { return t; } ++task_iterator; if (task_iterator == same_priority_end) { task_iterator = same_priority_start; } } while (task_iterator != begin_at); same_priority_start = same_priority_end; } return nullptr; } #ifdef MONITOR_UNSWITCHABLE_WAITS /** * Get the current time from the preferred monotonic clock in units of * seconds, relative to an unspecific point in the past. */ static double now_sec(void) { struct timespec tp; clock_gettime(CLOCK_MONOTONIC, &tp); return (double)tp.tv_sec + (double)tp.tv_nsec / 1e9; } #endif Task* Scheduler::get_next_thread(Task* t, Switchable switchable, bool* by_waitpid) { LOG(debug) << "Scheduling next task"; *by_waitpid = false; if (!current) { current = t; } assert(!t || t == current); if (t && switchable == PREVENT_SWITCH) { LOG(debug) << " (" << current->tid << " is un-switchable at " << current->ev() << ")"; if (current->is_running()) { LOG(debug) << " and running; waiting for state change"; /* |current| is un-switchable, but already running. Wait for it to change state * before "scheduling it", so avoid busy-waiting with our client. */ #ifdef MONITOR_UNSWITCHABLE_WAITS double start = now_sec(), wait_duration; #endif current->wait(); #ifdef MONITOR_UNSWITCHABLE_WAITS wait_duration = now_sec() - start; if (wait_duration >= 0.010) { log_warn("Waiting for unswitchable %s took %g ms", strevent(current->event), 1000.0 * wait_duration); } #endif *by_waitpid = true; LOG(debug) << " new status is " << HEX(current->status()); } return current; } /* Prefer switching to the next task if the current one * exceeded its event limit. */ if (current && current->succ_event_counter > max_events) { LOG(debug) << " previous task exceeded event limit, preferring next"; current->succ_event_counter = 0; if (current == get_next_round_robin_task()) { remove_round_robin_task(); } current = get_next_task_with_same_priority(current); } Task* next = find_next_runnable_task(by_waitpid); if (next && !next->unstable) { LOG(debug) << " selecting task " << next->tid; } else { // All the tasks are blocked (or we found an unstable-exit task). // Wait for the next one to change state. int status; pid_t tid; LOG(debug) << " all tasks blocked or some unstable, waiting for runnable (" << task_priority_set.size() << " total)"; do { tid = waitpid(-1, &status, __WALL | WSTOPPED | WUNTRACED); if (-1 == tid) { if (EINTR == errno) { LOG(debug) << " waitpid(-1) interrupted"; return nullptr; } FATAL() << "Failed to waitpid()"; } LOG(debug) << " " << tid << " changed status to " << HEX(status); next = session.find_task(tid); if (!next) { LOG(debug) << " ... but it's dead"; } } while (!next); ASSERT(next, next->unstable || next->may_be_blocked() || Task::ptrace_event_from_status(status) == PTRACE_EVENT_EXIT) << "Scheduled task should have been blocked or unstable"; next->did_waitpid(status); *by_waitpid = true; } note_switch(current, next, max_events); current = next; return current; } void Scheduler::on_create(Task* t) { assert(!t->in_round_robin_queue); task_priority_set.insert(make_pair(t->priority, t)); } void Scheduler::on_destroy(Task* t) { if (t == current) { current = get_next_task_with_same_priority(t); if (t == current) { current = nullptr; } } if (t->in_round_robin_queue) { auto iter = find(task_round_robin_queue.begin(), task_round_robin_queue.end(), t); task_round_robin_queue.erase(iter); } else { task_priority_set.erase(make_pair(t->priority, t)); } } void Scheduler::update_task_priority(Task* t, int value) { if (t->priority == value) { return; } if (t->in_round_robin_queue) { t->priority = value; return; } task_priority_set.erase(make_pair(t->priority, t)); t->priority = value; task_priority_set.insert(make_pair(t->priority, t)); } void Scheduler::schedule_one_round_robin(Task* t) { if (!task_round_robin_queue.empty()) { return; } for (auto iter : task_priority_set) { if (iter.second != t) { task_round_robin_queue.push_back(iter.second); iter.second->in_round_robin_queue = true; } } task_round_robin_queue.push_back(t); t->in_round_robin_queue = true; task_priority_set.clear(); } Task* Scheduler::get_next_round_robin_task() { if (task_round_robin_queue.empty()) { return nullptr; } return task_round_robin_queue.front(); } void Scheduler::remove_round_robin_task() { assert(!task_round_robin_queue.empty()); Task* t = task_round_robin_queue.front(); task_round_robin_queue.pop_front(); if (t) { t->in_round_robin_queue = false; task_priority_set.insert(make_pair(t->priority, t)); } } rr-4.1.0/src/Scheduler.h000066400000000000000000000154311265436462100150320ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_REC_SCHED_H_ #define RR_REC_SCHED_H_ #include #include #include "Ticks.h" #include "TraceFrame.h" #include "util.h" class RecordSession; class Task; /** * Overview of rr scheduling: * * rr honours priorities set by setpriority(2) --- even in situations where the * kernel doesn't, e.g. when a non-privileged task tries to increase its * priority. Normally rr honors priorities strictly by scheduling the highest * priority runnable task; tasks with equal priorities are scheduled in * round-robin fashion. Strict priority scheduling helps find bugs due to * starvation. * * When a task calls sched_yield we temporarily switch to a completely * fair scheduler that ignores priorities. All tasks are placed on a queue * and while the queue is non-empty we take the next task from the queue and * run it for a quantum if it's runnable. We do this because tasks calling * sched_yield are often expecting some kind of fair scheduling and may deadlock * (e.g. trying to acquire a spinlock) if some other tasks don't get a chance * to run. */ class Scheduler { public: /** * The following parameters define the default scheduling parameters. * The recorder scheduler basically works as follows * * 0. Find a task A with a pending event. * 1. If A was the last task scheduled, decrease its "max-event" * counter. * 2. Program an HPC interrupt for A that will fire after "max-ticks" * retired conditional branches (or so, it may not be precise). * 3. Resume the execution of A. * * The next thing that will occur is another scheduling event, after * which one of two things happens * * 0. Task A triggers a trace event in rr, which could be a signal, * syscall entry/exit, HPC interrupt, ... * 1. Some other task triggers an event. * * And then we make another scheduling decision. * * Like in most task schedulers, there are conflicting goals to * balance. Lower max-ticks / max-events generally makes the * application more "interactive", generally speaking lower latency. * (And wrt catching bugs, this setting generally creates more * opportunity for bugs to arise in multi-threaded/process * applications.) This comes at the cost of more overhead from * scheduling and context switching. Higher max-ticks / max-events * generally gives the application higher throughput. * * The rr scheduler is relatively dumb compared to modern OS * schedulers, but the default parameters are configured to achieve * * o IO-heavy tasks are relatively quickly switched, in the hope this * improves latency. * o CPU-heavy tasks are given an O(10ms) timeslice before being * switched. * o Keep max number of HPC interrupts small to avoid overhead. * * In addition to all the aforementioned deficiencies, using retired * conditional branches to compute timeslices is quite crude, since * they don't correspond to any unit of time in general. Hopefully * that can be improved, but empirical data from Firefox demonstrate, * surprisingly consistently, a distribution of insns/rcb massed * around 10. Somewhat arbitrarily guessing ~4cycles/insn on average * (fair amount of pointer chasing), that implies * * 10ms = .01s = x rcb * (10insn / rcb) * (4cycle / insn) * (1s / 2e9cycle) * x = 500000rcb / 10ms * * We'll arbitrarily decide to allow 10 max successive events for * latency reasons. To try to keep overhead lower (since trace traps * are heavyweight), we'll give each task a relatively large 50ms * timeslice. This works out to * * 50ms * (500000rcb / 10ms) / 10event = 250000 rcb / event */ enum { DEFAULT_MAX_TICKS = 250000 }; enum { DEFAULT_MAX_EVENTS = 10 }; Scheduler(RecordSession& session) : session(session), current(nullptr), max_ticks_(DEFAULT_MAX_TICKS), max_events(DEFAULT_MAX_EVENTS) {} void set_max_ticks(Ticks max_ticks) { max_ticks_ = max_ticks; } Ticks max_ticks() const { return max_ticks_; } void set_max_events(TraceFrame::Time max_events) { this->max_events = max_events; } /** * Given a previously-scheduled task |t|, return a new runnable task (which * may be |t|). * * The returned task is guaranteed to either have already been * runnable, or have been made runnable by a waitpid status change (in * which case, *by_waitpid will be nonzero.) * * Return nullptr if an interrupt occurred while waiting on a tracee. */ Task* get_next_thread(Task* t, Switchable switchable, bool* by_waitpid); /** * Set the priority of |t| to |value| and update related * state. */ void update_task_priority(Task* t, int value); /** * Do one round of round-robin scheduling if we're not already doing one. * If we start round-robin scheduling now, make last_task the last * task to be scheduled. * If the task_round_robin_queue is empty this moves all tasks into it, * putting last_task last. */ void schedule_one_round_robin(Task* last_task); void on_create(Task* t); /** * De-register a thread. This function should be called when a thread exits. */ void on_destroy(Task* t); private: // Tasks sorted by priority. typedef std::set > TaskPrioritySet; typedef std::deque TaskQueue; /** * Pull a task from the round-robin queue if available. Otherwise, * find the highest-priority task that is runnable. If the highest-priority * runnable task has the same priority as 'current', return 'current' or * the next runnable task after 'current' in round-robin order. * Sets 'by_waitpid' to true if we determined the task was runnable by * calling waitpid on it and observing a state change. */ Task* find_next_runnable_task(bool* by_waitpid); /** * Returns the first task in the round-robin queue or null if it's empty. */ Task* get_next_round_robin_task(); /** * Removes a task from the front of the round-robin queue. */ void remove_round_robin_task(); Task* get_next_task_with_same_priority(Task* t); RecordSession& session; /** * Every task of this session is either in task_priority_set * (when in_round_robin_queue is false), or in task_round_robin_queue * (when in_round_robin_queue is true). * * task_priority_set is a set of pairs of (task->priority, task). This * lets us efficiently iterate over the tasks with a given priority, or * all tasks in priority order. */ TaskPrioritySet task_priority_set; TaskQueue task_round_robin_queue; /** * The currently scheduled task. This may be nullptr if the last scheduled * task * has been destroyed. */ Task* current; Ticks max_ticks_; TraceFrame::Time max_events; }; #endif /* RR_REC_SCHED_H_ */ rr-4.1.0/src/ScopedFd.h000066400000000000000000000016441265436462100146040ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_SCOPED_FD_H_ #define RR_SCOPED_FD_H_ #include #include #include #include /** * RAII helper to open a file and then close the fd when the helper * goes out of scope. */ class ScopedFd { public: ScopedFd() : fd(-1) {} ScopedFd(int fd) : fd(fd) {} ScopedFd(const char* pathname, int flags, mode_t mode = 0) : fd(open(pathname, flags, mode)) {} ScopedFd(ScopedFd&& other) : fd(other.fd) { other.fd = -1; } ~ScopedFd() { close(); } ScopedFd& operator=(ScopedFd&& other) { fd = other.fd; other.fd = -1; return *this; } operator int() const { return get(); } int get() const { return fd; } bool is_open() { return fd >= 0; } void close() { if (fd >= 0) { ::close(fd); } fd = -1; } private: int fd; }; #endif // RR_SCOPED_FD_H rr-4.1.0/src/SeccompFilterRewriter.cc000066400000000000000000000066601265436462100175410ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "SeccompFilterRewriter.h" #include #include #include #include "AddressSpace.h" #include "AutoRemoteSyscalls.h" #include "kernel_abi.h" #include "log.h" #include "Registers.h" #include "seccomp-bpf.h" #include "task.h" using namespace std; static void set_syscall_result(Task* t, long ret) { Registers r = t->regs(); r.set_syscall_result(ret); t->set_regs(r); } template static void install_patched_seccomp_filter_arch( Task* t, unordered_map& result_to_index, vector& index_to_result) { // Take advantage of the fact that the filter program is arg3() in both // prctl and seccomp syscalls. bool ok = true; auto prog = t->read_mem(remote_ptr(t->regs().arg3()), &ok); if (!ok) { set_syscall_result(t, -EFAULT); return; } auto code = t->read_mem(prog.filter.rptr(), prog.len, &ok); if (!ok) { set_syscall_result(t, -EFAULT); return; } // Convert all returns to TRACE returns so that rr can handle them. // See handle_ptrace_event in RecordSession. for (auto& u : code) { if (BPF_CLASS(u.code) == BPF_RET) { ASSERT(t, BPF_RVAL(u.code) == BPF_K) << "seccomp-bpf program uses BPF_RET with A/X register, not " "supported"; if (u.k != SECCOMP_RET_ALLOW) { if (result_to_index.find(u.k) == result_to_index.end()) { ASSERT(t, index_to_result.size() < SECCOMP_RET_DATA) << "Too many distinct constants used in seccomp-bpf programs"; result_to_index[u.k] = index_to_result.size(); index_to_result.push_back(u.k); } u.k = result_to_index[u.k] | SECCOMP_RET_TRACE; } } } uintptr_t privileged_in_untraced_syscall_ip = AddressSpace::rr_page_ip_in_privileged_untraced_syscall() .register_value(); uintptr_t privileged_in_traced_syscall_ip = AddressSpace::rr_page_ip_in_privileged_traced_syscall().register_value(); assert(privileged_in_untraced_syscall_ip == uint32_t(privileged_in_untraced_syscall_ip)); assert(privileged_in_traced_syscall_ip == uint32_t(privileged_in_traced_syscall_ip)); static const typename Arch::sock_filter prefix[] = { ALLOW_SYSCALLS_FROM_CALLSITE(uint32_t(privileged_in_untraced_syscall_ip)), ALLOW_SYSCALLS_FROM_CALLSITE(uint32_t(privileged_in_traced_syscall_ip)) }; code.insert(code.begin(), prefix, prefix + array_length(prefix)); long ret; { AutoRemoteSyscalls remote(t); AutoRestoreMem mem(remote, nullptr, sizeof(prog) + code.size() * sizeof(typename Arch::sock_filter)); auto code_ptr = mem.get().cast(); t->write_mem(code_ptr, code.data(), code.size()); prog.len = code.size(); prog.filter = code_ptr; auto prog_ptr = remote_ptr(code_ptr + code.size()) .cast(); t->write_mem(prog_ptr, prog); ret = remote.syscall(t->regs().original_syscallno(), t->regs().arg1(), t->regs().arg2(), prog_ptr); } set_syscall_result(t, ret); } void SeccompFilterRewriter::install_patched_seccomp_filter(Task* t) { RR_ARCH_FUNCTION(install_patched_seccomp_filter_arch, t->arch(), t, result_to_index, index_to_result); } rr-4.1.0/src/SeccompFilterRewriter.h000066400000000000000000000025311265436462100173740ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_SECCOMP_FILTER_REWRITER_H_ #define RR_SECCOMP_FILTER_REWRITER_H_ #include #include #include #include class Task; /** * Object to support install_patched_seccomp_filter. */ class SeccompFilterRewriter { public: /** * Assuming |t| is set up for a prctl or seccomp syscall that * installs a seccomp-bpf filter, patch the filter to signal the tracer * instead of silently delivering an errno, and install it. */ void install_patched_seccomp_filter(Task* t); uint32_t map_filter_data_to_real_result(uint16_t value) { assert(value < index_to_result.size()); return index_to_result[value]; } private: /** * Seccomp filters can return 32-bit result values. We need to map all of * them into a single 16 bit data field. Fortunately (so far) all the * filters we've seen return constants, so there aren't too many distinct * values we need to deal with. For each constant value that gets returned, * we'll add it as the key in |result_map|, with the corresponding value * being the 16-bit data value that our rewritten filter returns. */ std::unordered_map result_to_index; std::vector index_to_result; }; #endif // RR_SECCOMP_FILTER_REWRITER_H_ rr-4.1.0/src/Session.cc000066400000000000000000000375261265436462100147060ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "Session" #include "Session.h" #include #include #include #include "rr/rr.h" #include "AutoRemoteSyscalls.h" #include "EmuFs.h" #include "kernel_metadata.h" #include "log.h" #include "task.h" #include "util.h" using namespace rr; using namespace std; struct Session::CloneCompletion { struct TaskGroup { Task* clone_leader; Task::CapturedState clone_leader_state; vector member_states; }; vector task_groups; }; Session::Session() : next_task_serial_(1), tracees_consistent(false), visible_execution_(true) { LOG(debug) << "Session " << this << " created"; } Session::~Session() { kill_all_tasks(); LOG(debug) << "Session " << this << " destroyed"; for (auto tg : task_group_map) { tg.second->forget_session(); } } Session::Session(const Session& other) { statistics_ = other.statistics_; next_task_serial_ = other.next_task_serial_; tracees_consistent = other.tracees_consistent; visible_execution_ = other.visible_execution_; } void Session::on_create(TaskGroup* tg) { task_group_map[tg->tguid()] = tg; } void Session::on_destroy(TaskGroup* tg) { task_group_map.erase(tg->tguid()); } void Session::post_exec() { assert_fully_initialized(); if (tracees_consistent) { return; } tracees_consistent = true; assert(tasks().size() == 1); tasks().begin()->second->flush_inconsistent_state(); } AddressSpace::shr_ptr Session::create_vm(Task* t, const std::string& exe, uint32_t exec_count) { assert_fully_initialized(); AddressSpace::shr_ptr as(new AddressSpace(t, exe, exec_count)); as->insert_task(t); vm_map[as->uid()] = as.get(); return as; } AddressSpace::shr_ptr Session::clone(Task* t, AddressSpace::shr_ptr vm) { assert_fully_initialized(); // If vm already belongs to our session this is a fork, otherwise it's // a session-clone AddressSpace::shr_ptr as; if (this == vm->session()) { as = AddressSpace::shr_ptr( new AddressSpace(this, *vm, t->rec_tid, t->tuid().serial(), 0)); } else { as = AddressSpace::shr_ptr(new AddressSpace(this, *vm, vm->uid().tid(), vm->uid().serial(), vm->uid().exec_count())); } vm_map[as->uid()] = as.get(); return as; } TaskGroup::shr_ptr Session::create_tg(Task* t) { TaskGroup::shr_ptr tg( new TaskGroup(this, nullptr, t->rec_tid, t->tid, t->tuid().serial())); tg->insert_task(t); return tg; } TaskGroup::shr_ptr Session::clone(Task* t, TaskGroup::shr_ptr tg) { assert_fully_initialized(); // If tg already belongs to our session this is a fork to create a new // taskgroup, otherwise it's a session-clone of an existing taskgroup if (this == tg->session()) { return TaskGroup::shr_ptr( new TaskGroup(this, tg.get(), t->rec_tid, t->tid, t->tuid().serial())); } TaskGroup* parent = tg->parent() ? find_task_group(tg->parent()->tguid()) : nullptr; return TaskGroup::shr_ptr( new TaskGroup(this, parent, tg->tgid, t->tid, tg->tguid().serial())); } vector Session::vms() const { vector result; for (auto& vm : vm_map) { result.push_back(vm.second); } return result; } Task* Session::clone(Task* p, int flags, remote_ptr stack, remote_ptr tls, remote_ptr cleartid_addr, pid_t new_tid, pid_t new_rec_tid) { assert_fully_initialized(); Task* c = p->clone(flags, stack, tls, cleartid_addr, new_tid, new_rec_tid, next_task_serial()); on_create(c); return c; } Task* Session::find_task(pid_t rec_tid) const { finish_initializing(); auto it = tasks().find(rec_tid); return tasks().end() != it ? it->second : nullptr; } Task* Session::find_task(const TaskUid& tuid) const { Task* t = find_task(tuid.tid()); return t && t->tuid() == tuid ? t : nullptr; } TaskGroup* Session::find_task_group(const TaskGroupUid& tguid) const { finish_initializing(); auto it = task_group_map.find(tguid); if (task_group_map.end() == it) { return nullptr; } return it->second; } AddressSpace* Session::find_address_space(const AddressSpaceUid& vmuid) const { finish_initializing(); auto it = vm_map.find(vmuid); if (vm_map.end() == it) { return nullptr; } return it->second; } void Session::kill_all_tasks() { for (auto& v : task_map) { Task* t = v.second; if (!t->is_stopped) { // During recording we might be aborting the recording, in which case // one or more tasks might not be stopped. We haven't got any really // good options here so we'll just skip detaching and try killing // it with SIGKILL below. rr will usually exit immediatley after this // so the likelihood that we'll leak a zombie task isn't too bad. continue; } if (!t->stable_exit) { /* * Prepare to forcibly kill this task by detaching it first. To ensure * the task doesn't continue executing, we first set its ip() to an * invalid value. We need to do this for all tasks in the Session before * kill() is guaranteed to work properly. SIGKILL on ptrace-attached tasks * seems to not work very well, and after sending SIGKILL we can't seem to * reliably detach. */ LOG(debug) << "safely detaching from " << t->tid << " ..."; // Detaching from the process lets it continue. We don't want a replaying // process to perform syscalls or do anything else observable before we // get around to SIGKILLing it. So we move its ip() to an address // which will cause it to do an exit() syscall if it runs at all. // We used to set this to an invalid address, but that causes a SIGSEGV // to be raised which can cause core dumps after we detach from ptrace. // Making the process undumpable with PR_SET_DUMPABLE turned out not to // be practical because that has a side effect of triggering various // security measures blocking inspection of the process (PTRACE_ATTACH, // access to /proc//fd). // Disabling dumps via setrlimit(RLIMIT_CORE, 0) doesn't stop dumps // if /proc/sys/kernel/core_pattern is set to pipe the core to a process // (e.g. to systemd-coredump). // We also tried setting ip() to an address that does an infinite loop, // but that leaves a runaway process if something happens to kill rr // after detaching but before we get a chance to SIGKILL the tracee. Registers r = t->regs(); r.set_ip(t->vm()->privileged_traced_syscall_ip()); r.set_syscallno(syscall_number_for_exit(r.arch())); r.set_arg1(0); t->set_regs(r); long result; do { // We have observed this failing with an ESRCH when the thread clearly // still exists and is ptraced. Retrying the PTRACE_DETACH seems to // work around it. result = t->fallible_ptrace(PTRACE_DETACH, nullptr, nullptr); ASSERT(t, result >= 0 || errno == ESRCH); } while (result < 0); } } while (!task_map.empty()) { Task* t = task_map.rbegin()->second; if (!t->stable_exit && !t->unstable) { /** * Destroy the OS task backing this by sending it SIGKILL and * ensuring it was delivered. After |kill()|, the only * meaningful thing that can be done with this task is to * delete it. */ LOG(debug) << "sending SIGKILL to " << t->tid << " ..."; // If we haven't already done a stable exit via syscall, // kill the task and note that the entire task group is unstable. // The task may already have exited due to the preparation above, // so we might accidentally shoot down the wrong task :-(, but we // have to do this because the task might be in a state where it's not // going to run and exit by itself. // Linux doesn't seem to give us a reliable way to detach and kill // the tracee without races. syscall(SYS_tgkill, t->real_tgid(), t->tid, SIGKILL); t->task_group()->destabilize(); } delete t; } } void Session::on_destroy(AddressSpace* vm) { assert(vm->task_set().size() == 0); assert(vm_map.count(vm->uid()) == 1); vm_map.erase(vm->uid()); } void Session::on_destroy(Task* t) { assert(task_map.count(t->rec_tid) == 1); task_map.erase(t->rec_tid); } void Session::on_create(Task* t) { task_map[t->rec_tid] = t; } BreakStatus Session::diagnose_debugger_trap(Task* t) { assert_fully_initialized(); BreakStatus break_status; break_status.task = t; TrapType pending_bp = t->vm()->get_breakpoint_type_at_addr(t->ip()); TrapType retired_bp = t->vm()->get_breakpoint_type_for_retired_insn(t->ip()); uintptr_t debug_status = t->consume_debug_status(); // NBB: very little effort has been made to handle // corner cases where multiple // breakpoints/watchpoints/singlesteps are fired // simultaneously. These cases will be addressed as // they arise in practice. int stop_sig = t->pending_sig(); if (SIGTRAP != stop_sig) { if (TRAP_BKPT_USER == pending_bp) { // A signal was raised /just/ before a trap // instruction for a SW breakpoint. This is // observed when debuggers write trap // instructions into no-exec memory, for // example the stack. // // We report the breakpoint before any signal // that might have been raised in order to let // the debugger do something at the breakpoint // insn; possibly clearing the breakpoint and // changing the $ip. Otherwise, we expect the // debugger to clear the breakpoint and resume // execution, which should raise the original // signal again. LOG(debug) << "hit debugger breakpoint BEFORE ip " << t->ip() << " for " << signal_name(stop_sig); #ifdef DEBUGTAG siginfo_t si = t->get_siginfo(); psiginfo(&si, " siginfo for signal-stop:\n "); #endif break_status.breakpoint_hit = true; } else if (stop_sig && stop_sig != PerfCounters::TIME_SLICE_SIGNAL) { break_status.signal = stop_sig; } } else if (TRAP_BKPT_USER == retired_bp) { LOG(debug) << "hit debugger breakpoint at ip " << t->ip(); // SW breakpoint: $ip is just past the // breakpoint instruction. Move $ip back // right before it. t->move_ip_before_breakpoint(); break_status.breakpoint_hit = true; } else if (DS_SINGLESTEP & debug_status) { LOG(debug) << " finished debugger stepi"; break_status.singlestep_complete = true; } // In VMWare Player 6.0.4 build-2249910, 32-bit Ubuntu x86 guest, // single-stepping does not trigger watchpoints :-(. We work around // that here by calling notify_watchpoint_fired if there's a singlestep // but no watchpoints reported; write-watchpoints will detect that their // value has changed and trigger. Read/exec watchpoints can't be detected // this way so they're still broken :-(. if ((DS_WATCHPOINT_ANY | DS_SINGLESTEP) & debug_status) { LOG(debug) << " " << t->tid << "(rec:" << t->rec_tid << "): hit debugger watchpoint."; t->vm()->notify_watchpoint_fired(debug_status); } check_for_watchpoint_changes(t, break_status); return break_status; } void Session::check_for_watchpoint_changes(Task* t, BreakStatus& break_status) { assert_fully_initialized(); break_status.watchpoints_hit = t->vm()->consume_watchpoint_changes(); } void Session::assert_fully_initialized() const { assert(!clone_completion && "Session not fully initialized"); } void Session::finish_initializing() const { if (!clone_completion) { return; } Session* self = const_cast(this); for (auto& tgleader : clone_completion->task_groups) { AutoRemoteSyscalls remote(tgleader.clone_leader); for (auto& tgmember : tgleader.member_states) { Task* t_clone = Task::os_clone_into(tgmember, tgleader.clone_leader, remote); self->on_create(t_clone); t_clone->copy_state(tgmember); } tgleader.clone_leader->copy_state(tgleader.clone_leader_state); } self->clone_completion = nullptr; } static void remap_shared_mmap(AutoRemoteSyscalls& remote, EmuFs& dest_emu_fs, const AddressSpace::Mapping& m_in_mem) { AddressSpace::Mapping m = m_in_mem; LOG(debug) << " remapping shared region at " << m.map.start() << "-" << m.map.end(); remote.infallible_syscall(syscall_number_for_munmap(remote.arch()), m.map.start(), m.map.size()); auto emufile = dest_emu_fs.at(m.recorded_map); // TODO: this duplicates some code in replay_syscall.cc, but // it's somewhat nontrivial to factor that code out. int remote_fd; { string path = emufile->proc_path(); AutoRestoreMem child_path(remote, path.c_str()); // Always open the emufs file O_RDWR, even if the current mapping prot // is read-only. We might mprotect it to read-write later. // skip leading '/' since we want the path to be relative to the root fd remote_fd = remote.infallible_syscall( syscall_number_for_openat(remote.arch()), RR_RESERVED_ROOT_DIR_FD, child_path.get() + 1, O_RDWR); if (0 > remote_fd) { FATAL() << "Couldn't open " << path << " in tracee"; } } struct stat real_file = remote.task()->stat_fd(remote_fd); string real_file_name = remote.task()->file_name_of_fd(remote_fd); // XXX this condition is x86/x64-specific, I imagine. remote.infallible_mmap_syscall(m.map.start(), m.map.size(), m.map.prot(), // The remapped segment *must* be // remapped at the same address, // or else many things will go // haywire. (m.map.flags() & ~MAP_ANONYMOUS) | MAP_FIXED, remote_fd, m.map.file_offset_bytes() / page_size()); // We update the AddressSpace mapping too, since that tracks the real file // name and we need to update that. remote.task()->vm()->map(m.map.start(), m.map.size(), m.map.prot(), m.map.flags(), m.map.file_offset_bytes(), real_file_name, real_file.st_dev, real_file.st_ino, &m.recorded_map); remote.infallible_syscall(syscall_number_for_close(remote.arch()), remote_fd); } void Session::copy_state_to(Session& dest, EmuFs& dest_emu_fs) { assert_fully_initialized(); assert(!dest.clone_completion); auto completion = unique_ptr(new CloneCompletion()); for (auto vm : vm_map) { // Pick an arbitrary task to be group leader. The actual group leader // might have died already. Task* group_leader = *vm.second->task_set().begin(); LOG(debug) << " forking tg " << group_leader->tgid() << " (real: " << group_leader->real_tgid() << ")"; completion->task_groups.push_back(CloneCompletion::TaskGroup()); auto& group = completion->task_groups.back(); group.clone_leader = group_leader->os_fork_into(&dest); dest.on_create(group.clone_leader); LOG(debug) << " forked new group leader " << group.clone_leader->tid; { AutoRemoteSyscalls remote(group.clone_leader); for (auto m : group.clone_leader->vm()->maps()) { if ((m.recorded_map.flags() & MAP_SHARED) && dest_emu_fs.has_file_for(m.recorded_map)) { remap_shared_mmap(remote, dest_emu_fs, m); } } for (auto t : group_leader->task_group()->task_set()) { if (group_leader == t) { continue; } LOG(debug) << " cloning " << t->rec_tid; group.member_states.push_back(t->capture_state()); } } group.clone_leader_state = group_leader->capture_state(); } dest.clone_completion = move(completion); assert(dest.vms().size() > 0); } rr-4.1.0/src/Session.h000066400000000000000000000200601265436462100145310ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_SESSION_H_ #define RR_SESSION_H_ #include #include #include #include #include #include #include "AddressSpace.h" #include "TaskishUid.h" #include "TraceStream.h" class AddressSpace; class DiversionSession; class EmuFs; class RecordSession; class ReplaySession; class Task; class TaskGroup; // The following types are used by step() APIs in Session subclasses. /** * In general, multiple break reasons can apply simultaneously. */ struct BreakStatus { BreakStatus() : task(nullptr), signal(0), breakpoint_hit(false), singlestep_complete(false), approaching_ticks_target(false), task_exit(false) {} // The triggering Task. This may be different from session->current_task() // when replay switches to a new task when ReplaySession::replay_step() ends. Task* task; // List of watchpoints hit; any watchpoint hit causes a stop after the // instruction that triggered the watchpoint has completed. std::vector watchpoints_hit; // When nonzero, we stopped because a signal was delivered to |task|. int signal; // True when we stopped because we hit a breakpoint at |task|'s current // ip(). bool breakpoint_hit; // True when we stopped because a singlestep completed in |task|. bool singlestep_complete; // True when we stopped because we got too close to the specified ticks // target. bool approaching_ticks_target; // True when we stopped because |task| is about to exit. bool task_exit; bool any_break() { return !watchpoints_hit.empty() || signal || breakpoint_hit || singlestep_complete || approaching_ticks_target; } }; enum RunCommand { // Continue until we hit a breakpoint or a new replay event RUN_CONTINUE, // Execute a single instruction (unless at a breakpoint or a replay event) RUN_SINGLESTEP, // Like RUN_SINGLESTEP, but a single-instruction loop is allowed (but not // required) to execute multiple times if we don't reach a different // instruction. Usable with ReplaySession::replay_step only. RUN_SINGLESTEP_FAST_FORWARD }; /** * Sessions track the global state of a set of tracees corresponding * to an rr recorder or replayer. During recording, the tracked * tracees will all write to the same TraceWriter, and during * replay, the tracees that will be tracked will all be created based * on the same TraceReader. * * Multiple sessions can coexist in the same process. This * is required when using replay checkpoints, for example. */ class Session { friend class ReplaySession; public: // AddressSpaces and TaskGroups are indexed by their first task's TaskUid // (effectively), so that if the first task dies and its tid is recycled, // we don't get confused. TaskMap is indexed by tid since there can never be // two Tasks with the same tid at the same time. typedef std::map AddressSpaceMap; typedef std::map TaskMap; typedef std::map TaskGroupMap; /** * Call |post_exec()| immediately after a tracee has successfully * |execve()|'d. After that, |can_validate()| returns true. * This is called while we're still in the execve syscall so it's not safe * to perform remote syscalls in this method. * * Tracee state can't be validated before the first exec, * because the address space inside the rr process for |rr * replay| will be different than it was for |rr record|. * After the first exec, we're running tracee code, and * everything must be the same. */ void post_exec(); bool can_validate() const { return tracees_consistent; } /** * Create and return a new address space that's constructed * from |t|'s actual OS address space. */ std::shared_ptr create_vm(Task* t, const std::string& exe, uint32_t exec_count = 0); /** * Return a copy of |vm| with the same mappings. If any * mapping is changed, only the |clone()|d copy is updated, * not its origin (i.e. copy-on-write semantics). */ std::shared_ptr clone(Task* t, std::shared_ptr vm); std::shared_ptr create_tg(Task* t); /** * Return a copy of |tg| with the same mappings. */ std::shared_ptr clone(Task* t, std::shared_ptr tg); /** See Task::clone(). */ Task* clone(Task* p, int flags, remote_ptr stack, remote_ptr tls, remote_ptr cleartid_addr, pid_t new_tid, pid_t new_rec_tid = -1); uint32_t next_task_serial() { return next_task_serial_++; } /** * Return the task created with |rec_tid|, or nullptr if no such * task exists. */ Task* find_task(pid_t rec_tid) const; Task* find_task(const TaskUid& tuid) const; /** * Return the task group whose unique ID is |tguid|, or nullptr if no such * task group exists. */ TaskGroup* find_task_group(const TaskGroupUid& tguid) const; /** * Return the AddressSpace whose unique ID is |vmuid|, or nullptr if no such * address space exists. */ AddressSpace* find_address_space(const AddressSpaceUid& vmuid) const; /** * |tasks().size()| will be zero and all the OS tasks will be * gone when this returns, or this won't return. */ void kill_all_tasks(); /** * Call these functions from the objects' destructors in order * to notify this session that the objects are dying. */ void on_destroy(AddressSpace* vm); virtual void on_destroy(Task* t); void on_create(TaskGroup* tg); void on_destroy(TaskGroup* tg); /** Return the set of Tasks being tracekd in this session. */ const TaskMap& tasks() const { finish_initializing(); return task_map; } /** * Return the set of AddressSpaces being tracked in this session. */ std::vector vms() const; virtual RecordSession* as_record() { return nullptr; } virtual ReplaySession* as_replay() { return nullptr; } virtual DiversionSession* as_diversion() { return nullptr; } bool is_recording() { return as_record() != nullptr; } bool is_replaying() { return as_replay() != nullptr; } bool is_diversion() { return as_diversion() != nullptr; } bool visible_execution() const { return visible_execution_; } void set_visible_execution(bool visible) { visible_execution_ = visible; } struct Statistics { Statistics() : bytes_written(0), ticks_processed(0), syscalls_performed(0) {} uint64_t bytes_written; Ticks ticks_processed; uint32_t syscalls_performed; }; void accumulate_bytes_written(uint64_t bytes_written) { statistics_.bytes_written += bytes_written; } void accumulate_syscall_performed() { statistics_.syscalls_performed += 1; } void accumulate_ticks_processed(Ticks ticks) { statistics_.ticks_processed += ticks; } Statistics statistics() { return statistics_; } protected: Session(); virtual ~Session(); Session(const Session& other); Session& operator=(const Session&) = delete; virtual void on_create(Task* t); BreakStatus diagnose_debugger_trap(Task* t); void check_for_watchpoint_changes(Task* t, BreakStatus& break_status); void copy_state_to(Session& dest, EmuFs& dest_emu_fs); struct CloneCompletion; // Call this before doing anything that requires access to the full set // of tasks (i.e., almost anything!). Not really const! void finish_initializing() const; void assert_fully_initialized() const; AddressSpaceMap vm_map; TaskMap task_map; TaskGroupMap task_group_map; // If non-null, data required to finish initializing the tasks of this // session. std::unique_ptr clone_completion; Statistics statistics_; uint32_t next_task_serial_; /** * True if we've done an exec so tracees are now in a state that will be * consistent across record and replay. */ bool tracees_consistent; /** * True while the execution of this session is visible to users. */ bool visible_execution_; }; #endif // RR_SESSION_H_ rr-4.1.0/src/StdioMonitor.cc000066400000000000000000000020601265436462100156760ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "StdioMonitor.h" #include "Flags.h" #include "log.h" #include "ReplaySession.h" #include "Session.h" #include "task.h" Switchable StdioMonitor::will_write(Task* t) { if (Flags::get().mark_stdio && t->session().visible_execution()) { char buf[256]; snprintf(buf, sizeof(buf) - 1, "[rr %d %d]", t->tgid(), t->trace_time()); ssize_t len = strlen(buf); if (write(original_fd, buf, len) != len) { ASSERT(t, false) << "Couldn't write to " << original_fd; } } return PREVENT_SWITCH; } void StdioMonitor::did_write(Task* t, const std::vector& ranges) { if (t->session().is_replaying() && t->replay_session().redirect_stdio() && t->session().visible_execution()) { for (auto& r : ranges) { auto bytes = t->read_mem(r.data.cast(), r.length); if (bytes.size() != (size_t)write(original_fd, bytes.data(), bytes.size())) { ASSERT(t, false) << "Couldn't write to " << original_fd; } } } } rr-4.1.0/src/StdioMonitor.h000066400000000000000000000027521265436462100155500ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_STDIO_MONITOR_H_ #define RR_STDIO_MONITOR_H_ #include "FileMonitor.h" /** * A FileMonitor to track writes to rr's stdout/stderr fds. * StdioMonitor prevents syscallbuf from buffering output to those fds. It * adds the optional stdio markers. During replay, it echoes stdio writes. */ class StdioMonitor : public FileMonitor { public: /** * Create a StdioMonitor that monitors writes to rr's original_fd * (STDOUT_FILENO or STDERR_FILENO). * Note that it's possible for a tracee to have a StdioMonitor associated * with a different fd, thanks to dup() etc. */ StdioMonitor(int original_fd) : original_fd(original_fd) {} /** * Make writes to stdout/stderr blocking, to avoid nondeterminism in the * order in which the kernel actually performs such writes. * This theoretically introduces the possibility of deadlock between rr's * tracee and some external program reading rr's output * via a pipe ... but that seems unlikely to bite in practice. * * Also, if stdio-marking is enabled, prepend the stdio write with * "[rr ]". This allows users to more easily correlate * stdio with trace event numbers. */ virtual Switchable will_write(Task* t); /** * During replay, echo writes to stdout/stderr. */ virtual void did_write(Task* t, const std::vector& ranges); private: int original_fd; }; #endif /* RR_STDIO_MONITOR_H_ */ rr-4.1.0/src/StringVectorToCharArray.h000066400000000000000000000011501265436462100176360ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_STRING_VECTOR_TO_CHAR_ARRAY_H_ #define RR_STRING_VECTOR_TO_CHAR_ARRAY_H_ #include #include /** * Converts a vector of strings to a POSIX-style array of char*s terminated * by a nullptr. */ class StringVectorToCharArray { public: StringVectorToCharArray(const std::vector& vs) { for (auto& v : vs) { array.push_back(const_cast(v.c_str())); } array.push_back(nullptr); } char** get() { return array.data(); } private: std::vector array; }; #endif rr-4.1.0/src/TaskishUid.h000066400000000000000000000041561265436462100151660ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_TASKISH_UID_H_ #define RR_TASKISH_UID_H_ #include #include class AddressSpace; class Task; class TaskGroup; /** * An ID that's unique within a Session (but consistent across * multiple ReplaySessions for the same trace), used by Tasks, TaskGroups * and AddressSpaces. * This is needed because tids can be recycled during a long-running session. */ template class TaskishUid { public: TaskishUid() : tid_(0), serial_(0) {} TaskishUid(pid_t tid, uint32_t serial) : tid_(tid), serial_(serial) {} TaskishUid(const TaskishUid& other) = default; bool operator==(const TaskishUid& other) const { return tid_ == other.tid_ && serial_ == other.serial_; } bool operator!=(const TaskishUid& other) const { return !(*this == other); } bool operator<(const TaskishUid& other) const { if (tid_ < other.tid_) { return true; } if (tid_ > other.tid_) { return false; } return serial_ < other.serial_; } pid_t tid() const { return tid_; } uint32_t serial() const { return serial_; } private: pid_t tid_; uint32_t serial_; }; typedef TaskishUid TaskUid; typedef TaskishUid TaskGroupUid; class AddressSpaceUid : public TaskishUid { public: AddressSpaceUid() : exec_count_(0) {} AddressSpaceUid(pid_t tid, uint32_t serial, uint32_t exec_count) : TaskishUid(tid, serial), exec_count_(exec_count) {} AddressSpaceUid(const AddressSpaceUid& other) = default; bool operator==(const AddressSpaceUid& other) const { return TaskishUid::operator==(other) && exec_count_ == other.exec_count_; } bool operator<(const AddressSpaceUid& other) const { if (TaskishUid::operator<(other)) { return true; } if (other.TaskishUid::operator<(*this)) { return false; } return exec_count_ < other.exec_count_; } uint32_t exec_count() const { return exec_count_; } private: uint32_t exec_count_; }; #endif // RR_TASKISH_UID_H_ rr-4.1.0/src/Ticks.h000066400000000000000000000003011265436462100141570ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_TICKS_H_ #define RR_TICKS_H_ #include typedef int64_t Ticks; #endif /* RR_TICKS_H_ */ rr-4.1.0/src/TraceFrame.cc000066400000000000000000000034031265436462100152570ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "TraceFrame" #include "TraceFrame.h" #include #include void TraceFrame::set_exec_info(const Registers& regs, const PerfCounters::Extra* extra_perf_values, const ExtraRegisters* extra_regs) { assert(event().record_exec_info() == HAS_EXEC_INFO); recorded_regs = regs; if (extra_perf_values) { extra_perf = *extra_perf_values; } if (extra_regs) { recorded_extra_regs = *extra_regs; } } void TraceFrame::dump(FILE* out) const { out = out ? out : stdout; fprintf(out, "{\n global_time:%u, event:`%s' ", time(), event().str().c_str()); if (event().is_syscall_event()) { fprintf(out, "(state:%s) ", state_name(event().Syscall().state)); } fprintf(out, "tid:%d, ticks:%" PRId64 "\n", tid(), ticks()); if (event().has_exec_info() != HAS_EXEC_INFO) { return; } if (PerfCounters::extra_perf_counters_enabled()) { fprintf(out, " hw_ints:%" PRId64 " faults:%" PRId64 " insns:%" PRId64 "\n", extra_perf.hw_interrupts, extra_perf.page_faults, extra_perf.instructions_retired); } regs().print_register_file_for_trace(out); fprintf(out, "\n"); } void TraceFrame::dump_raw(FILE* out) const { out = out ? out : stdout; fprintf(out, " %d %d %d %" PRId64, time(), tid(), event().encode().encoded, ticks()); if (event().has_exec_info() != HAS_EXEC_INFO) { fprintf(out, "\n"); return; } fprintf(out, " %" PRId64 " %" PRId64 " %" PRId64, extra_perf.hw_interrupts, extra_perf.page_faults, extra_perf.instructions_retired); regs().print_register_file_for_trace_raw(out); fprintf(out, "\n"); } rr-4.1.0/src/TraceFrame.h000066400000000000000000000046221265436462100151250ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_TRACE_FRAME_H_ #define RR_TRACE_FRAME_H_ #include #include #include #include "Event.h" #include "ExtraRegisters.h" #include "PerfCounters.h" #include "Registers.h" #include "Ticks.h" class TraceReader; class TraceWriter; /** * A trace_frame is one "trace event" from a complete trace. During * recording, a trace_frame is recorded upon each significant event, * for example a context-switch or syscall. During replay, a * trace_frame represents a "next state" that needs to be transitioned * into, and the information recorded in the frame dictates the nature * of the transition. */ class TraceFrame { public: typedef uint32_t Time; TraceFrame(Time global_time, pid_t tid, const Event& event, Ticks tick_count) : global_time(global_time), tid_(tid), ev(event), ticks_(tick_count) {} TraceFrame() : global_time(0), tid_(0), ticks_(0) {} void set_exec_info(const Registers& regs, const PerfCounters::Extra* extra_perf_values, const ExtraRegisters* extra_regs); Time time() const { return global_time; } pid_t tid() const { return tid_; } const Event& event() const { return ev; } Ticks ticks() const { return ticks_; } const Registers& regs() const { return recorded_regs; } const ExtraRegisters& extra_regs() const { return recorded_extra_regs; } const PerfCounters::Extra& extra_perf_values() const { return extra_perf; } /** * Log a human-readable representation of this to |out| * (defaulting to stdout), including a newline character. * A human-friendly format is used. Does not emit a trailing '}' * (so the caller can add more fields to the record). */ void dump(FILE* out = nullptr) const; /** * Log a human-readable representation of this to |out| * (defaulting to stdout), including a newline character. An * easily machine-parseable format is dumped. */ void dump_raw(FILE* out = nullptr) const; private: friend class TraceReader; friend class TraceWriter; Time global_time; pid_t tid_; Event ev; Ticks ticks_; PerfCounters::Extra extra_perf; Registers recorded_regs; // Only used when has_exec_info, but variable length (and usually not // present) so we don't want to stuff it into exec_info ExtraRegisters recorded_extra_regs; }; #endif /* RR_TRACE_FRAME_H_ */ rr-4.1.0/src/TraceStream.cc000066400000000000000000000462421265436462100154700ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "Trace" #include "TraceStream.h" #include #include #include #include #include #include #include "log.h" #include "util.h" using namespace std; // // This represents the format and layout of recorded traces. This // version number doesn't track the rr version number, because changes // to the trace format will be rare. // // NB: if you *do* change the trace format for whatever reason, you // MUST increment this version number. Otherwise users' old traces // will become unreplayable and they won't know why. // #define TRACE_VERSION 40 struct SubstreamData { const char* name; size_t block_size; int threads; }; static const SubstreamData substreams[TraceStream::SUBSTREAM_COUNT] = { { "events", 1024 * 1024, 1 }, { "data_header", 1024 * 1024, 1 }, { "data", 8 * 1024 * 1024, 3 }, { "mmaps", 64 * 1024, 1 }, { "tasks", 64 * 1024, 1 } }; static const SubstreamData& substream(TraceStream::Substream s) { return substreams[s]; } static TraceStream::Substream operator++(TraceStream::Substream& s) { s = (TraceStream::Substream)(s + 1); return s; } static bool dir_exists(const string& dir) { struct stat dummy; return !dir.empty() && stat(dir.c_str(), &dummy) == 0; } static string default_rr_trace_dir() { static string cached_dir; if (!cached_dir.empty()) { return cached_dir; } string dot_dir; const char* home = getenv("HOME"); if (home) { dot_dir = string(home) + "/.rr"; } string xdg_dir; const char* xdg_data_home = getenv("XDG_DATA_HOME"); if (xdg_data_home) { xdg_dir = string(xdg_data_home) + "/rr"; } else if (home) { xdg_dir = string(home) + "/.local/share/rr"; } // If XDG dir does not exist but ~/.rr does, prefer ~/.rr for backwards // compatibility. if (dir_exists(xdg_dir)) { cached_dir = xdg_dir; } else if (dir_exists(dot_dir)) { cached_dir = dot_dir; } else if (!xdg_dir.empty()) { cached_dir = xdg_dir; } else { cached_dir = "/tmp/rr"; } return cached_dir; } static string trace_save_dir() { const char* output_dir = getenv("_RR_TRACE_DIR"); return output_dir ? output_dir : default_rr_trace_dir(); } static string latest_trace_symlink() { return trace_save_dir() + "/latest-trace"; } static void ensure_dir(const string& dir, mode_t mode) { string d = dir; while (!d.empty() && d[d.length() - 1] == '/') { d = d.substr(0, d.length() - 1); } struct stat st; if (0 > stat(d.c_str(), &st)) { if (errno != ENOENT) { FATAL() << "Error accessing trace directory `" << dir << "'"; } size_t last_slash = d.find_last_of('/'); if (last_slash == string::npos || last_slash == 0) { FATAL() << "Can't find trace directory `" << dir << "'"; } ensure_dir(d.substr(0, last_slash), mode); // Allow for a race condition where someone else creates the directory if (0 > mkdir(d.c_str(), mode) && errno != EEXIST) { FATAL() << "Can't create trace directory `" << dir << "'"; } if (0 > stat(d.c_str(), &st)) { FATAL() << "Can't stat trace directory `" << dir << "'"; } } if (!(S_IFDIR & st.st_mode)) { FATAL() << "`" << dir << "' exists but isn't a directory."; } if (access(d.c_str(), W_OK)) { FATAL() << "Can't write to `" << dir << "'."; } } /** * Create the default ~/.rr directory if it doesn't already exist. */ static void ensure_default_rr_trace_dir() { ensure_dir(default_rr_trace_dir(), S_IRWXU); } string TraceStream::path(Substream s) { return trace_dir + "/" + substream(s).name; } bool TraceWriter::good() const { for (auto& w : writers) { if (!w->good()) { return false; } } return true; } bool TraceReader::good() const { for (auto& r : readers) { if (!r->good()) { return false; } } return true; } struct BasicInfo { TraceFrame::Time global_time; pid_t tid_; EncodedEvent ev; Ticks ticks_; }; void TraceWriter::write_frame(const TraceFrame& frame) { auto& events = writer(EVENTS); BasicInfo basic_info = { frame.time(), frame.tid(), frame.event().encode(), frame.ticks() }; events << basic_info; if (!events.good()) { FATAL() << "Tried to save " << sizeof(basic_info) << " bytes to the trace, but failed"; } // TODO: only store exec info for non-async-sig events when // debugging assertions are enabled. if (frame.event().has_exec_info() == HAS_EXEC_INFO) { events << frame.regs() << frame.extra_perf_values(); if (!events.good()) { FATAL() << "Tried to save registers to the trace, but failed"; } int extra_reg_bytes = frame.extra_regs().data_size(); char extra_reg_format = (char)frame.extra_regs().format(); events << extra_reg_format << extra_reg_bytes; if (!events.good()) { FATAL() << "Tried to save " << sizeof(extra_reg_bytes) + sizeof(extra_reg_format) << " bytes to the trace, but failed"; } if (extra_reg_bytes > 0) { events.write((const char*)frame.extra_regs().data_bytes(), extra_reg_bytes); if (!events.good()) { FATAL() << "Tried to save " << extra_reg_bytes << " bytes to the trace, but failed"; } } } if (frame.event().is_signal_event()) { events << frame.event().Signal().signal_data(); } tick_time(); } TraceFrame TraceReader::read_frame() { // Read the common event info first, to see if we also have // exec info to read. auto& events = reader(EVENTS); BasicInfo basic_info; events >> basic_info; TraceFrame frame(basic_info.global_time, basic_info.tid_, Event(basic_info.ev), basic_info.ticks_); if (frame.event().has_exec_info() == HAS_EXEC_INFO) { events >> frame.recorded_regs >> frame.extra_perf; int extra_reg_bytes; char extra_reg_format; events >> extra_reg_format >> extra_reg_bytes; if (extra_reg_bytes > 0) { vector data; data.resize(extra_reg_bytes); events.read((char*)data.data(), extra_reg_bytes); frame.recorded_extra_regs.set_arch(frame.event().arch()); frame.recorded_extra_regs.set_to_raw_data( (ExtraRegisters::Format)extra_reg_format, data); } else { assert(extra_reg_format == ExtraRegisters::NONE); frame.recorded_extra_regs = ExtraRegisters(frame.event().arch()); } } if (frame.event().is_signal_event()) { uint64_t signal_data; events >> signal_data; frame.ev.Signal().set_signal_data(signal_data); } tick_time(); assert(time() == frame.time()); return frame; } void TraceWriter::write_task_event(const TraceTaskEvent& event) { auto& tasks = writer(TASKS); tasks << event.type() << event.tid(); switch (event.type()) { case TraceTaskEvent::CLONE: tasks << event.parent_tid() << event.clone_flags(); break; case TraceTaskEvent::FORK: tasks << event.parent_tid(); break; case TraceTaskEvent::EXEC: tasks << event.file_name() << event.cmd_line() << event.fds_to_close(); break; case TraceTaskEvent::EXIT: break; case TraceTaskEvent::NONE: assert(0 && "Writing NONE TraceTaskEvent"); break; } } TraceTaskEvent TraceReader::read_task_event() { auto& tasks = reader(TASKS); TraceTaskEvent r; tasks >> r.type_ >> r.tid_; switch (r.type()) { case TraceTaskEvent::CLONE: tasks >> r.parent_tid_ >> r.clone_flags_; break; case TraceTaskEvent::FORK: tasks >> r.parent_tid_; break; case TraceTaskEvent::EXEC: tasks >> r.file_name_ >> r.cmd_line_ >> r.fds_to_close_; break; case TraceTaskEvent::EXIT: break; case TraceTaskEvent::NONE: // Should be EOF only assert(!tasks.good()); break; } return r; } string TraceWriter::try_hardlink_file(const string& file_name) { char count_str[20]; sprintf(count_str, "%d", mmap_count); size_t last_slash = file_name.rfind('/'); string basename = (last_slash != file_name.npos) ? file_name.substr(last_slash + 1) : file_name; string link_path = dir() + "/mmap_" + count_str + "_hardlink_" + basename; int ret = link(file_name.c_str(), link_path.c_str()); if (ret < 0) { // maybe tried to link across filesystems? return file_name; } return link_path; } TraceWriter::RecordInTrace TraceWriter::write_mapped_region( const KernelMapping& km, const struct stat& stat, MappingOrigin origin) { auto& mmaps = writer(MMAPS); TraceReader::MappedDataSource source; string backing_file_name; if (km.fsname().find("/SYSV") == 0) { source = TraceReader::SOURCE_TRACE; } else if (origin == SYSCALL_MAPPING && (km.inode() == 0 || km.fsname() == "/dev/zero (deleted)")) { source = TraceReader::SOURCE_ZERO; } else if (should_copy_mmap_region(km, stat) && files_assumed_immutable.find(make_pair( stat.st_dev, stat.st_ino)) == files_assumed_immutable.end()) { source = TraceReader::SOURCE_TRACE; } else { source = TraceReader::SOURCE_FILE; // Try hardlinking file into the trace directory. This will avoid // replay failures if the original file is deleted or replaced (but not // if it is overwritten in-place). If try_hardlink_file fails it // just returns the original file name. // A relative backing_file_name is relative to the trace directory. backing_file_name = try_hardlink_file(km.fsname()); files_assumed_immutable.insert(make_pair(stat.st_dev, stat.st_ino)); } mmaps << global_time << source << km.start() << km.end() << km.fsname() << km.device() << km.inode() << km.prot() << km.flags() << km.file_offset_bytes() << backing_file_name << (uint32_t)stat.st_mode << (uint32_t)stat.st_uid << (uint32_t)stat.st_gid << (int64_t)stat.st_size << (int64_t)stat.st_mtime; ++mmap_count; return source == TraceReader::SOURCE_TRACE ? RECORD_IN_TRACE : DONT_RECORD_IN_TRACE; } KernelMapping TraceReader::read_mapped_region(MappedData* data, bool* found) { if (found) { *found = false; } auto& mmaps = reader(MMAPS); if (mmaps.at_end()) { return KernelMapping(); } mmaps.save_state(); TraceFrame::Time time; mmaps >> time; mmaps.restore_state(); if (time != global_time) { return KernelMapping(); } string original_file_name; string backing_file_name; remote_ptr start, end; dev_t device; ino_t inode; int prot, flags; uint32_t uid, gid, mode; uint64_t file_offset_bytes; int64_t mtime, file_size; mmaps >> time >> data->source >> start >> end >> original_file_name >> device >> inode >> prot >> flags >> file_offset_bytes >> backing_file_name >> mode >> uid >> gid >> file_size >> mtime; assert(time == global_time); if (data->source == SOURCE_FILE) { if (backing_file_name[0] != '/') { backing_file_name = dir() + "/" + backing_file_name; } struct stat backing_stat; if (stat(backing_file_name.c_str(), &backing_stat)) { FATAL() << "Failed to stat " << backing_file_name << ": replay is impossible"; } if (backing_stat.st_ino != inode || backing_stat.st_mode != mode || backing_stat.st_uid != uid || backing_stat.st_gid != gid || backing_stat.st_size != file_size || backing_stat.st_mtime != mtime) { LOG(error) << "Metadata of " << original_file_name << " changed: replay divergence likely, but continuing anyway ..."; } } data->file_name = backing_file_name; data->file_data_offset_bytes = file_offset_bytes; data->file_size_bytes = file_size; if (found) { *found = true; } return KernelMapping(start, end, original_file_name, device, inode, prot, flags, file_offset_bytes); } static ostream& operator<<(ostream& out, const vector& vs) { out << vs.size() << endl; for (auto& v : vs) { out << v << '\0'; } return out; } static istream& operator>>(istream& in, vector& vs) { size_t len; in >> len; in.ignore(1); for (size_t i = 0; i < len; ++i) { char buf[PATH_MAX]; in.getline(buf, sizeof(buf), '\0'); vs.push_back(buf); } return in; } void TraceWriter::write_raw(const void* d, size_t len, remote_ptr addr) { auto& data = writer(RAW_DATA); auto& data_header = writer(RAW_DATA_HEADER); data_header << global_time << addr.as_int() << len; data.write(d, len); } TraceReader::RawData TraceReader::read_raw_data() { auto& data = reader(RAW_DATA); auto& data_header = reader(RAW_DATA_HEADER); TraceFrame::Time time; RawData d; size_t num_bytes; data_header >> time >> d.addr >> num_bytes; assert(time == global_time); d.data.resize(num_bytes); data.read((char*)d.data.data(), num_bytes); return d; } bool TraceReader::read_raw_data_for_frame(const TraceFrame& frame, RawData& d) { auto& data_header = reader(RAW_DATA_HEADER); if (data_header.at_end()) { return false; } TraceFrame::Time time; data_header.save_state(); data_header >> time; data_header.restore_state(); assert(time >= frame.time()); if (time > frame.time()) { return false; } d = read_raw_data(); return true; } void TraceWriter::close() { for (auto& w : writers) { w->close(); } } static string make_trace_dir(const string& exe_path) { ensure_default_rr_trace_dir(); // Find a unique trace directory name. int nonce = 0; int ret; string dir; do { stringstream ss; ss << trace_save_dir() << "/" << basename(exe_path.c_str()) << "-" << nonce++; dir = ss.str(); ret = mkdir(dir.c_str(), S_IRWXU | S_IRWXG); } while (ret && EEXIST == errno); if (ret) { FATAL() << "Unable to create trace directory `" << dir << "'"; } return dir; } TraceWriter::TraceWriter(const vector& argv, const vector& envp, const string& cwd, int bind_to_cpu) : TraceStream(make_trace_dir(argv[0]), // Somewhat arbitrarily start the // global time from 1. 1), mmap_count(0) { this->argv = argv; this->envp = envp; this->cwd = cwd; this->bind_to_cpu = bind_to_cpu; for (Substream s = SUBSTREAM_FIRST; s < SUBSTREAM_COUNT; ++s) { writers[s] = unique_ptr(new CompressedWriter( path(s), substream(s).block_size, substream(s).threads)); } string ver_path = version_path(); fstream version(ver_path.c_str(), fstream::out); if (!version.good()) { FATAL() << "Unable to create " << ver_path; } version << TRACE_VERSION << endl; string link_name = latest_trace_symlink(); // Try to update the symlink to |this|. We only try attempt // to set the symlink once. If the link is re-created after // we |unlink()| it, then another rr process is racing with us // and it "won". The link is then valid and points at some // very-recent trace, so that's good enough. unlink(link_name.c_str()); int ret = symlink(trace_dir.c_str(), link_name.c_str()); if (ret < 0 && errno != EEXIST) { FATAL() << "Failed to update symlink `" << link_name << "' to `" << trace_dir << "'."; } if (!probably_not_interactive(STDOUT_FILENO)) { printf("rr: Saving the execution of `%s' to trace directory `%s'.\n", argv[0].c_str(), trace_dir.c_str()); } ofstream out(args_env_path()); out << cwd << '\0'; out << argv; out << envp; out << bind_to_cpu; assert(out.good()); } TraceFrame TraceReader::peek_frame() { auto& events = reader(EVENTS); events.save_state(); auto saved_time = global_time; TraceFrame frame; if (!at_end()) { frame = read_frame(); } events.restore_state(); global_time = saved_time; return frame; } TraceFrame TraceReader::peek_to(pid_t pid, EventType type, SyscallState state) { auto& events = reader(EVENTS); TraceFrame frame; events.save_state(); auto saved_time = global_time; while (good() && !at_end()) { frame = read_frame(); if (frame.tid() == pid && frame.event().type() == type && (!frame.event().is_syscall_event() || frame.event().Syscall().state == state)) { events.restore_state(); global_time = saved_time; return frame; } } FATAL() << "Unable to find requested frame in stream"; // Unreachable return frame; } void TraceReader::rewind() { for (Substream s = SUBSTREAM_FIRST; s < SUBSTREAM_COUNT; ++s) { reader(s).rewind(); } global_time = 0; assert(good()); } TraceReader::TraceReader(const string& dir) : TraceStream(dir.empty() ? latest_trace_symlink() : dir, // Initialize the global time at 0, so // that when we tick it when reading // the first trace, it matches the // initial global time at recording, 1. 0) { for (Substream s = SUBSTREAM_FIRST; s < SUBSTREAM_COUNT; ++s) { readers[s] = unique_ptr(new CompressedReader(path(s))); } string path = version_path(); fstream vfile(path.c_str(), fstream::in); if (!vfile.good()) { fprintf( stderr, "\n" "rr: error: Version file for recorded trace `%s' not found. Did you " "record\n" " `%s' with an older version of rr? If so, you'll need to " "replay\n" " `%s' with that older version. Otherwise, your trace is\n" " likely corrupted.\n" "\n", path.c_str(), path.c_str(), path.c_str()); exit(EX_DATAERR); } int version = 0; vfile >> version; if (vfile.fail() || TRACE_VERSION != version) { fprintf(stderr, "\n" "rr: error: Recorded trace `%s' has an incompatible " "version %d; expected\n" " %d. Did you record `%s' with an older version " "of rr? If so,\n" " you'll need to replay `%s' with that older " "version. Otherwise,\n" " your trace is likely corrupted.\n" "\n", path.c_str(), version, TRACE_VERSION, path.c_str(), path.c_str()); exit(EX_DATAERR); } ifstream in(args_env_path()); assert(in.good()); char buf[PATH_MAX]; in.getline(buf, sizeof(buf), '\0'); cwd = buf; in >> argv; in >> envp; in >> bind_to_cpu; } /** * Create a copy of this stream that has exactly the same * state as 'other', but for which mutations of this * clone won't affect the state of 'other' (and vice versa). */ TraceReader::TraceReader(const TraceReader& other) : TraceStream(other.dir(), other.time()) { for (Substream s = SUBSTREAM_FIRST; s < SUBSTREAM_COUNT; ++s) { readers[s] = unique_ptr(new CompressedReader(other.reader(s))); } argv = other.argv; envp = other.envp; cwd = other.cwd; bind_to_cpu = other.bind_to_cpu; } uint64_t TraceReader::uncompressed_bytes() const { uint64_t total = 0; for (Substream s = SUBSTREAM_FIRST; s < SUBSTREAM_COUNT; ++s) { total += reader(s).uncompressed_bytes(); } return total; } uint64_t TraceReader::compressed_bytes() const { uint64_t total = 0; for (Substream s = SUBSTREAM_FIRST; s < SUBSTREAM_COUNT; ++s) { total += reader(s).compressed_bytes(); } return total; } rr-4.1.0/src/TraceStream.h000066400000000000000000000210261265436462100153230ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_TRACE_H_ #define RR_TRACE_H_ #include #include #include #include #include #include "CompressedReader.h" #include "CompressedWriter.h" #include "Event.h" #include "remote_ptr.h" #include "TraceFrame.h" #include "TraceTaskEvent.h" class KernelMapping; /** * TraceStream stores all the data common to both recording and * replay. TraceWriter deals with recording-specific logic, and * TraceReader handles replay-specific details. * * These classes are all in the same .h/.cc file to keep trace reading and * writing code together for easier coordination. */ class TraceStream { protected: typedef std::string string; public: /** * Update |substreams| and TRACE_VERSION when you update this list. */ enum Substream { SUBSTREAM_FIRST, // Substream that stores events (trace frames). EVENTS = SUBSTREAM_FIRST, // Substreams that store raw data saved from tracees (|RAW_DATA|), and // metadata about the stored data (|RAW_DATA_HEADER|). RAW_DATA_HEADER, RAW_DATA, // Substream that stores metadata about files mmap'd during // recording. MMAPS, // Substream that stores task creation and exec events TASKS, SUBSTREAM_COUNT }; /** Return the directory storing this trace's files. */ const string& dir() const { return trace_dir; } const string& initial_exe() const { return argv[0]; } const std::vector& initial_argv() const { return argv; } const std::vector& initial_envp() const { return envp; } const string& initial_cwd() const { return cwd; } int bound_to_cpu() const { return bind_to_cpu; } /** * Return the current "global time" (event count) for this * trace. */ TraceFrame::Time time() const { return global_time; } protected: TraceStream(const string& trace_dir, TraceFrame::Time initial_time) : trace_dir(trace_dir), global_time(initial_time) {} /** * Return the path of the file for the given substream. */ string path(Substream s); /** * Return the path of the "args_env" file, into which the * initial tracee argv and envp are recorded. */ string args_env_path() const { return trace_dir + "/args_env"; } /** * Return the path of "version" file, into which the current * trace format version of rr is stored upon creation of the * trace. */ string version_path() const { return trace_dir + "/version"; } /** * Increment the global time and return the incremented value. */ void tick_time() { ++global_time; } // Directory into which we're saving the trace files. string trace_dir; // The initial argv and envp for a tracee. std::vector argv; std::vector envp; // Current working directory at start of record/replay. string cwd; // CPU core# that the tracees are bound to int bind_to_cpu; // Arbitrary notion of trace time, ticked on the recording of // each event (trace frame). TraceFrame::Time global_time; }; class TraceWriter : public TraceStream { public: /** * Write trace frame to the trace. * * Recording a trace frame has the side effect of ticking * the global time. */ void write_frame(const TraceFrame& frame); enum RecordInTrace { DONT_RECORD_IN_TRACE, RECORD_IN_TRACE }; enum MappingOrigin { SYSCALL_MAPPING, EXEC_MAPPING, PATCH_MAPPING }; /** * Write mapped-region record to the trace. * If this returns RECORD_IN_TRACE, then the data for the map should be * recorded in the trace raw-data. */ RecordInTrace write_mapped_region(const KernelMapping& map, const struct stat& stat, MappingOrigin origin = SYSCALL_MAPPING); /** * Write a raw-data record to the trace. * 'addr' is the address in the tracee where the data came from/will be * restored to. */ void write_raw(const void* data, size_t len, remote_ptr addr); /** * Write a task event (clone or exec record) to the trace. */ void write_task_event(const TraceTaskEvent& event); /** * Return true iff all trace files are "good". */ bool good() const; /** Call close() on all the relevant trace files. * Normally this will be called by the destructor. It's helpful to * call this before a crash that won't call the destructor, to ensure * buffered data is flushed. */ void close(); /** * Create a trace that will record the initial exe * image |argv[0]| with initial args |argv|, initial environment |envp|, * current working directory |cwd| and bound to cpu |bind_to_cpu|. This * data is recored in the trace. * The trace name is determined by the global rr args and environment. */ TraceWriter(const std::vector& argv, const std::vector& envp, const string& cwd, int bind_to_cpu); private: std::string try_hardlink_file(const std::string& file_name); CompressedWriter& writer(Substream s) { return *writers[s]; } const CompressedWriter& writer(Substream s) const { return *writers[s]; } std::unique_ptr writers[SUBSTREAM_COUNT]; /** * Files that have already been mapped without being copied to the trace, * i.e. that we have already assumed to be immutable. */ std::set > files_assumed_immutable; uint32_t mmap_count; }; class TraceReader : public TraceStream { public: /** * A parcel of recorded tracee data. |data| contains the data read * from |addr| in the tracee. */ struct RawData { std::vector data; remote_ptr addr; }; /** * Read relevant data from the trace. * * NB: reading a trace frame has the side effect of ticking * the global time to match the time recorded in the trace * frame. */ TraceFrame read_frame(); enum MappedDataSource { SOURCE_TRACE, SOURCE_FILE, SOURCE_ZERO }; /** * Where to obtain data for the mapped region. */ struct MappedData { MappedDataSource source; /** Name of file to map the data from. */ string file_name; /** Data offset within the file. */ uint64_t file_data_offset_bytes; /** Original size of mapped file. */ uint64_t file_size_bytes; }; /** * Read the next mapped region descriptor and return it. * Also returns where to get the mapped data in 'data'. * If |found| is non-null, set *found to indicate whether a descriptor * was found for the current event. */ KernelMapping read_mapped_region(MappedData* data, bool* found = nullptr); /** * Peek at the next mapping. Returns an empty region if there isn't one for * the current event. */ KernelMapping peek_mapped_region(); /** * Read a task event (clone or exec record) from the trace. * Returns a record of type NONE at the end of the trace. */ TraceTaskEvent read_task_event(); /** * Read the next raw data record and return it. */ RawData read_raw_data(); /** * Reads the next raw data record for 'frame' from the current point in * the trace. If there are no more raw data records for 'frame', returns * false. */ bool read_raw_data_for_frame(const TraceFrame& frame, RawData& d); /** * Return true iff all trace files are "good". * for more details. */ bool good() const; /** * Return true if we're at the end of the trace file. */ bool at_end() const { return reader(EVENTS).at_end(); } /** * Return the next trace frame, without mutating any stream * state. */ TraceFrame peek_frame(); /** * Peek ahead in the stream to find the next trace frame that * matches the requested parameters. Returns the frame if one * was found, and issues a fatal error if not. */ TraceFrame peek_to(pid_t pid, EventType type, SyscallState state); /** * Restore the state of this to what it was just after * |open()|. */ void rewind(); uint64_t uncompressed_bytes() const; uint64_t compressed_bytes() const; /** * Open the trace in 'dir'. When 'dir' is the empty string, open the * latest trace. */ TraceReader(const string& dir); /** * Create a copy of this stream that has exactly the same * state as 'other', but for which mutations of this * clone won't affect the state of 'other' (and vice versa). */ TraceReader(const TraceReader& other); private: CompressedReader& reader(Substream s) { return *readers[s]; } const CompressedReader& reader(Substream s) const { return *readers[s]; } std::unique_ptr readers[SUBSTREAM_COUNT]; }; #endif /* RR_TRACE_H_ */ rr-4.1.0/src/TraceTaskEvent.h000066400000000000000000000043341265436462100157770ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_TRACE_TASK_EVENT_H_ #define RR_TRACE_TASK_EVENT_H_ #include #include #include #include "Event.h" #include "ExtraRegisters.h" #include "PerfCounters.h" #include "TraceFrame.h" class TraceReader; class TraceWriter; class TraceTaskEvent { public: TraceTaskEvent(pid_t tid, pid_t parent_tid) : type_(FORK), tid_(tid), parent_tid_(parent_tid) {} TraceTaskEvent(pid_t tid, pid_t parent_tid, uint32_t clone_flags) : type_(CLONE), tid_(tid), parent_tid_(parent_tid), clone_flags_(clone_flags) {} TraceTaskEvent(pid_t tid, const std::string& file_name, const std::vector cmd_line) : type_(EXEC), tid_(tid), file_name_(file_name), cmd_line_(cmd_line) {} TraceTaskEvent(pid_t tid) : type_(EXIT), tid_(tid) {} TraceTaskEvent() : type_(NONE) {} enum Type { NONE, CLONE, // created by clone(2) syscall FORK, // created by fork(2) syscall EXEC, EXIT }; Type type() const { return type_; } pid_t tid() const { return tid_; } pid_t parent_tid() const { assert(type() == CLONE || type() == FORK); return parent_tid_; } uintptr_t clone_flags() const { assert(type() == CLONE); return clone_flags_; } const std::string& file_name() const { assert(type() == EXEC); return file_name_; } const std::vector& cmd_line() const { assert(type() == EXEC); return cmd_line_; } const std::vector& fds_to_close() const { assert(type() == EXEC); return fds_to_close_; } bool is_fork() const { return type() == FORK || (type() == CLONE && !(clone_flags() & CLONE_VM)); } void set_fds_to_close(const std::vector fds) { assert(type() == EXEC); fds_to_close_ = fds; } private: friend class TraceReader; friend class TraceWriter; Type type_; pid_t tid_; pid_t parent_tid_; // CLONE only uintptr_t clone_flags_; // CLONE only std::string file_name_; // EXEC only std::vector cmd_line_; // EXEC only std::vector fds_to_close_; // EXEC only }; #endif /* RR_TRACE_TASK_EVENT_H_ */ rr-4.1.0/src/assembly_templates.py000066400000000000000000000214611265436462100172120ustar00rootroot00000000000000import StringIO import sys class RawBytes(object): """A sequence of literal bytes to appear in an assembly language template.""" def __init__(self, *bytes): self.bytes = bytes def __len__(self): return len(self.bytes) class Field(object): """A variable field of bytes.""" def __init__(self, name, byte_length): self.name = name self.byte_length = byte_length def __len__(self): return self.byte_length def c_type(self): types = { 8: 'uint64_t', 4: 'uint32_t', 2: 'uint16_t', 1: 'uint8_t' } return types[self.byte_length] class AssemblyTemplate(object): """A sequence of RawBytes and Field objects, which can be used to verify that a given sequence of assembly instructions matches the RawBytes while pulling out the Field values for inspection. Or for creating custom assembly stubs, filling out Fields with runtime-determined values.""" def __init__(self, *chunks): # Merge consecutive RawBytes elements together for efficiency of # matching and for simplicity of template expansion. merged_chunks = [] current_raw_bytes = [] for c in chunks: if isinstance(c, Field): # Push any raw bytes before this. if current_raw_bytes: merged_chunks.append(RawBytes(*current_raw_bytes)) current_raw_bytes = [] merged_chunks.append(c) else: current_raw_bytes.extend(c.bytes) # Merge in trailing raw bytes. if current_raw_bytes: merged_chunks.append(RawBytes(*current_raw_bytes)) self.chunks = merged_chunks def fields(self): return [c for c in self.chunks if isinstance(c, Field)] def bytes(self): bytes = [] for c in self.chunks: if isinstance(c, Field): bytes.extend([0] * len(c)) else: bytes.extend(c.bytes) return bytes templates = { 'X86SysenterVsyscallImplementation': AssemblyTemplate( RawBytes(0x51), # push %ecx RawBytes(0x52), # push %edx RawBytes(0x55), # push %ebp RawBytes(0x89, 0xe5), # mov %esp,%ebp RawBytes(0x0f, 0x34), # sysenter ), 'X86SysenterVsyscallUseInt80': AssemblyTemplate( RawBytes(0xcd, 0x80), # int $0x80 RawBytes(0xc3), # ret ), 'X86SysenterVsyscallSyscallHook': AssemblyTemplate( RawBytes(0xe9), # jmp $syscall_hook_trampoline Field('syscall_hook_trampoline', 4), ), 'X86VsyscallMonkeypatch': AssemblyTemplate( RawBytes(0x53), # push %ebx RawBytes(0xb8), # mov $syscall_number,%eax Field('syscall_number', 4), # __vdso functions use the C calling convention, so # we have to set up the syscall parameters here. # No x86-32 __vdso functions take more than two parameters. RawBytes(0x8b, 0x5c, 0x24, 0x08), # mov 0x8(%esp),%ebx RawBytes(0x8b, 0x4c, 0x24, 0x0c), # mov 0xc(%esp),%ecx RawBytes(0xcd, 0x80), # int $0x80 # pad with NOPs to make room to dynamically patch the syscall # with a call to the preload library, once syscall buffering # has been initialized. RawBytes(0x90), # nop RawBytes(0x90), # nop RawBytes(0x90), # nop RawBytes(0x5b), # pop %ebx RawBytes(0xc3), # ret ), 'X86SyscallStubExtendedJump': AssemblyTemplate( RawBytes(0xe9), # jmp Field('relative_jump_target', 4), ), 'X86SyscallStubMonkeypatch': AssemblyTemplate( # This code must match the stubs in syscall_hook.S. # We must adjust the stack pointer without modifying flags, # at least on the return path. RawBytes(0xc7, 0x84, 0x24, 0x00, 0xff, 0xff, 0xff), # movq $fake_return_addr,-256(%esp) Field('fake_return_addr', 4), RawBytes(0x89, 0xa4, 0x24, 0x04, 0xff, 0xff, 0xff), # mov %esp,-252(%esp) RawBytes(0x8d, 0xa4, 0x24, 0x00, 0xff, 0xff, 0xff), # lea -256(%esp),%esp RawBytes(0xe8), # call $trampoline_relative_addr Field('trampoline_relative_addr', 4), RawBytes(0x8d, 0xa4, 0x24, 0x00, 0x01, 0x00, 0x00), # lea 256(%esp),%esp RawBytes(0xff, 0xa4, 0x24, 0x00, 0xff, 0xff, 0xff), # jmp -256(%esp) ), 'X64JumpMonkeypatch': AssemblyTemplate( RawBytes(0xe9), # jmp $relative_addr Field('relative_addr', 4), ), 'X64VsyscallMonkeypatch': AssemblyTemplate( RawBytes(0xb8), # mov $syscall_number,%eax Field('syscall_number', 4), RawBytes(0x0f, 0x05), # syscall # pad with NOPs to make room to dynamically patch the syscall # with a call to the preload library, once syscall buffering # has been initialized. RawBytes(0x90), # nop RawBytes(0x90), # nop RawBytes(0x90), # nop RawBytes(0xc3), # ret ), 'X64SyscallStubExtendedJump': AssemblyTemplate( RawBytes(0xff, 0x25, 0x00, 0x00, 0x00, 0x00), # jmp *0(%rip) Field('jump_target', 8), ), 'X64SyscallStubMonkeypatch': AssemblyTemplate( # This code must match the stubs in syscall_hook.S. # We must adjust the stack pointer without modifying flags, # at least on the return path. RawBytes(0xc7, 0x84, 0x24, 0x00, 0xff, 0xff, 0xff), # movl $return_addr_lo,-256(%rsp) Field('return_addr_lo', 4), RawBytes(0xc7, 0x84, 0x24, 0x04, 0xff, 0xff, 0xff), # movl $return_addr_hi,-252(%rsp) Field('return_addr_hi', 4), RawBytes(0x48, 0x89, 0xa4, 0x24, 0x08, 0xff, 0xff, 0xff), # mov %rsp,-248(%rsp) RawBytes(0x48, 0x8d, 0xa4, 0x24, 0x00, 0xff, 0xff, 0xff), # lea -256(%rsp),%rsp RawBytes(0xe8), # call $trampoline_relative_addr Field('trampoline_relative_addr', 4), RawBytes(0x48, 0x8d, 0xa4, 0x24, 0x00, 0x01, 0x00, 0x00), # lea 256(%rsp),%rsp RawBytes(0xff, 0xa4, 0x24, 0x00, 0xff, 0xff, 0xff), # jmp -256(%rsp) ), } def byte_array_name(name): return '%s_bytes' % name def generate_match_method(byte_array, template): s = StringIO.StringIO() fields = template.fields() field_types = [f.c_type() for f in fields] field_names = [f.name for f in fields] args = ', ' + ', '.join("%s* %s" % (t, n) for t, n in zip(field_types, field_names)) \ if fields else '' s.write('static bool match(const uint8_t* buffer %s) {\n' % (args,)) offset = 0 for chunk in template.chunks: if isinstance(chunk, Field): field_name = chunk.name s.write(' memcpy(%s, &buffer[%d], sizeof(*%s));\n' % (field_name, offset, field_name)) else: s.write(' if (memcmp(&buffer[%d], &%s[%d], %d) != 0) { return false; }\n' % (offset, byte_array, offset, len(chunk))) offset += len(chunk) s.write(' return true;\n') s.write(' }') return s.getvalue() def generate_substitute_method(byte_array, template): s = StringIO.StringIO() fields = template.fields() field_types = [f.c_type() for f in fields] field_names = [f.name for f in fields] args = ', ' + ', '.join("%s %s" % (t, n) for t, n in zip(field_types, field_names)) \ if fields else '' s.write('static void substitute(uint8_t* buffer %s) {\n' % (args,)) offset = 0 for chunk in template.chunks: if isinstance(chunk, Field): field_name = chunk.name s.write(' memcpy(&buffer[%d], &%s, sizeof(%s));\n' % (offset, field_name, field_name)) else: s.write(' memcpy(&buffer[%d], &%s[%d], %d);\n' % (offset, byte_array, offset, len(chunk))) offset += len(chunk) s.write(' }') return s.getvalue() def generate_size_member(byte_array): s = StringIO.StringIO() s.write('static const size_t size = sizeof(%s);' % byte_array) return s.getvalue() def generate(f): # Raw bytes. for name, template in templates.iteritems(): bytes = template.bytes() f.write('static const uint8_t %s[] = { %s };\n' % (byte_array_name(name), ', '.join(['0x%x' % b for b in bytes]))) f.write('\n') # Objects representing assembly templates. for name, template in templates.iteritems(): byte_array = byte_array_name(name) f.write("""class %(class_name)s { public: %(match_method)s %(substitute_method)s %(size_member)s }; """ % { 'class_name': name, 'match_method': generate_match_method(byte_array, template), 'substitute_method': generate_substitute_method(byte_array, template), 'size_member': generate_size_member(byte_array), }) f.write('\n\n') rr-4.1.0/src/drm.h000066400000000000000000000105131265436462100136720ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_DRM_H #define RR_DRM_H 1 #include #include #include // TODO this should all move to kernel_abi.h #ifdef __cplusplus extern "C" { #endif /** * The *drm.h headers don't play well when included by external code. * They don't compile without hacks when included by either C or C++ * code, and additionally have version-specific quirks. So we get * ourselves off that treadmill by creating a clean "shadow header". * * TODO: add checks that these shadow definitions are consistent with * the system headers'. */ /*---------------------------------------------------------------------------*/ typedef unsigned int drm_magic_t; struct drm_version { int version_major; int version_minor; int version_patchlevel; size_t name_len; char* name; size_t date_len; char* date; size_t desc_len; char* desc; }; struct drm_auth { drm_magic_t magic; }; struct drm_gem_open { __u32 name; __u32 handle; __u64 size; }; #define DRM_IOCTL_BASE 'd' #define DRM_IO(nr) _IO(DRM_IOCTL_BASE, nr) #define DRM_IOR(nr, type) _IOR(DRM_IOCTL_BASE, nr, type) #define DRM_IOW(nr, type) _IOW(DRM_IOCTL_BASE, nr, type) #define DRM_IOWR(nr, type) _IOWR(DRM_IOCTL_BASE, nr, type) #define DRM_COMMAND_BASE 0x40 #define DRM_IOCTL_VERSION DRM_IOWR(0x00, struct drm_version) #define DRM_IOCTL_GET_MAGIC DRM_IOR(0x02, struct drm_auth) #define DRM_IOCTL_GEM_OPEN DRM_IOWR(0x0b, struct drm_gem_open) /*---------------------------------------------------------------------------*/ struct drm_i915_gem_pwrite { __u32 handle; __u32 pad; __u64 offset; __u64 size; __u64 data_ptr; }; struct drm_i915_gem_mmap { __u32 handle; __u32 pad; __u64 offset; __u64 size; __u64 addr_ptr; }; #define DRM_I915_GEM_PWRITE 0x1d #define DRM_I915_GEM_MMAP 0x1e #define DRM_IOCTL_I915_GEM_PWRITE \ DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite) #define DRM_IOCTL_I915_GEM_MMAP \ DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap) /*---------------------------------------------------------------------------*/ struct drm_nouveau_gem_info { uint32_t handle; uint32_t domain; uint64_t size; uint64_t offset; uint64_t map_handle; uint32_t tile_mode; uint32_t tile_flags; }; struct drm_nouveau_gem_new { struct drm_nouveau_gem_info info; uint32_t channel_hint; uint32_t align; }; struct drm_nouveau_gem_pushbuf { uint32_t channel; uint32_t nr_buffers; uint64_t buffers; uint32_t nr_relocs; uint32_t nr_push; uint64_t relocs; uint64_t push; uint32_t suffix0; uint32_t suffix1; uint64_t vram_available; uint64_t gart_available; }; #define DRM_NOUVEAU_GEM_NEW 0x40 #define DRM_NOUVEAU_GEM_PUSHBUF 0x41 #define DRM_IOCTL_NOUVEAU_GEM_NEW \ DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_NEW, struct drm_nouveau_gem_new) #define DRM_IOCTL_NOUVEAU_GEM_PUSHBUF \ DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_PUSHBUF, \ struct drm_nouveau_gem_pushbuf) /*---------------------------------------------------------------------------*/ struct drm_radeon_info { uint32_t request; uint32_t pad; uint64_t value; }; struct drm_radeon_gem_create { uint64_t size; uint64_t alignment; uint32_t handle; uint32_t initial_domain; uint32_t flags; }; struct drm_radeon_gem_get_tiling { uint32_t handle; uint32_t tiling_flags; uint32_t pitch; }; #define DRM_RADEON_INFO 0x27 #define DRM_RADEON_GEM_CREATE 0x1d #define DRM_RADEON_GEM_GET_TILING 0x29 #define DRM_IOCTL_RADEON_INFO \ DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info) #define DRM_IOCTL_RADEON_GEM_CREATE \ DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_CREATE, \ struct drm_radeon_gem_create) #define DRM_IOCTL_RADEON_GEM_GET_TILING \ DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, \ struct drm_radeon_gem_get_tiling) #ifdef __cplusplus } /* extern "C" */ #endif #endif /* RR_DRM_H */ rr-4.1.0/src/exec_stub.c000066400000000000000000000001451265436462100150640ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ void _start(void) {} rr-4.1.0/src/fast_forward.cc000066400000000000000000000357041265436462100157400ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "FastForward" #include "fast_forward.h" #include "log.h" using namespace rr; using namespace std; struct InstructionBuf { SupportedArch arch; uint8_t code_buf[32]; int code_buf_len; }; static InstructionBuf read_instruction(Task* t, remote_code_ptr ip) { InstructionBuf result; result.arch = t->arch(); result.code_buf_len = (int)t->read_bytes_fallible( ip.to_data_ptr(), sizeof(result.code_buf), result.code_buf); return result; } struct DecodedInstruction { int operand_size; int length; bool modifies_flags; bool uses_si; }; /** * This can be conservative: for weird prefix combinations that make valid * string instructions, but aren't ever used in practice, we can return false. */ static bool decode_x86_string_instruction(const InstructionBuf& code, DecodedInstruction* decoded) { bool found_operand_prefix = false; bool found_REP_prefix = false; bool found_REXW_prefix = false; decoded->modifies_flags = false; decoded->uses_si = false; int i; bool done = false; for (i = 0; i < code.code_buf_len; ++i) { switch (code.code_buf[i]) { case 0x66: found_operand_prefix = true; break; case 0x48: if (code.arch == x86_64) { found_REXW_prefix = true; break; } return false; case 0xF2: case 0xF3: found_REP_prefix = true; break; case 0xA4: // MOVSB case 0xA5: // MOVSW decoded->uses_si = true; done = true; break; case 0xAA: // STOSB case 0xAB: // STOSW case 0xAC: // LODSB case 0xAD: // LODSW done = true; break; case 0xA6: // CMPSB case 0xA7: // CMPSW decoded->modifies_flags = true; decoded->uses_si = true; done = true; break; case 0xAE: // SCASB case 0xAF: // SCASW decoded->modifies_flags = true; done = true; break; default: return false; } if (done) { break; } } if (!found_REP_prefix) { return false; } decoded->length = i + 1; if (code.code_buf[i] & 1) { decoded->operand_size = found_REXW_prefix ? 8 : (found_operand_prefix ? 2 : 4); } else { decoded->operand_size = 1; } return true; } static bool mem_intersect(remote_ptr a1, int s1, remote_ptr a2, int s2) { assert(a1 + s1 > a1); assert(a2 + s2 > a2); return max(a1, a2) < min(a1 + s1, a2 + s2); } static void bound_iterations_for_watchpoint(Task* t, remote_ptr reg, const DecodedInstruction& decoded, const WatchConfig& watch, uintptr_t* iterations) { // Compute how many iterations it will take before we hit the watchpoint. // 0 means the first iteration will hit the watchpoint. int size = decoded.operand_size; int direction = t->regs().df_flag() ? -1 : 1; if (mem_intersect(reg, size, watch.addr, watch.num_bytes)) { *iterations = 0; return; } // Number of iterations we can perform without triggering the watchpoint uintptr_t steps; if (direction > 0) { if (watch.addr < reg) { // We're assuming wraparound can't happpen! return; } // We'll hit the first byte of the watchpoint moving forward. steps = (watch.addr - reg) / size; } else { if (watch.addr > reg) { // We're assuming wraparound can't happpen! return; } // We'll hit the last byte of the watchpoint moving backward. steps = (reg - (watch.addr + watch.num_bytes)) / size + 1; } *iterations = min(*iterations, steps); } static bool is_x86ish(Task* t) { return t->arch() == x86 || t->arch() == x86_64; } bool fast_forward_through_instruction(Task* t, ResumeRequest how, const vector& states) { assert(how == RESUME_SINGLESTEP || how == RESUME_SYSEMU_SINGLESTEP); remote_code_ptr ip = t->ip(); t->resume_execution(how, RESUME_WAIT, RESUME_UNLIMITED_TICKS); if (t->pending_sig() != SIGTRAP) { // we might have stepped into a system call... return false; } if (t->ip() != ip) { return false; } if (t->vm()->get_breakpoint_type_at_addr(ip) != TRAP_NONE) { // breakpoint must have fired return false; } if (t->vm()->notify_watchpoint_fired(t->debug_status())) { // watchpoint fired return false; } for (auto& state : states) { if (state->matches(t->regs())) { return false; } } if (!is_x86ish(t)) { return false; } InstructionBuf instruction_buf = read_instruction(t, ip); DecodedInstruction decoded; if (!decode_x86_string_instruction(instruction_buf, &decoded)) { return false; } remote_code_ptr limit_ip = ip + decoded.length; // At this point we can be sure the instruction didn't trigger a syscall, // so we no longer care about the value of |how|. Registers extra_state_to_avoid; vector states_copy; auto using_states = &states; bool did_execute = false; while (true) { // This string instruction should execute until CX reaches 0 and // we move to the next instruction, or we hit one of the states in // |states|, or the ZF flag changes so that the REP stops, or we hit // a watchpoint. (We can't hit a breakpoint during the loop since we // already verified there isn't one set here.) // We'll compute an upper bound on the number of string instruction // iterations to execute, and set a watchpoint on the memory location // accessed through DI in the iteration we want to stop at. We'll also // set a breakpoint after the string instruction to catch cases where it // ends due to a ZF change. // Keep in mind that it's possible that states in |states| might // belong to multiple independent loops of this string instruction, with // registers reset in between the loops. uintptr_t cur_cx = t->regs().cx(); if (cur_cx == 0) { // This instruction will be skipped entirely. return did_execute; } // Don't execute the last iteration of the string instruction. That // simplifies code below that tries to emulate the register effects // of singlestepping to predict if the next singlestep would result in a // mark_vector state. uintptr_t iterations = cur_cx - 1; // Bound |iterations| to ensure we stop before reaching any |states|. for (auto& state : *using_states) { if (state->ip() == ip) { uintptr_t dest_cx = state->cx(); if (dest_cx == 0) { // This state represents entering the string instruction with CX==0, // so we can't reach this state in the current loop. continue; } if (dest_cx >= cur_cx) { // This can't be reached in the current loop. continue; } iterations = min(iterations, cur_cx - dest_cx - 1); } else if (state->ip() == limit_ip) { uintptr_t dest_cx = state->cx(); if (dest_cx >= cur_cx) { // This can't be reached in the current loop. continue; } iterations = min(iterations, cur_cx - dest_cx - 1); } } // To stop before the ZF changes and we exit the loop, we don't bound // the iterations here. Instead we run the loop, observe the ZF change, // and then rerun the loop with the loop-exit state added to the |states| // list. See below. // A code watchpoint would already be hit if we're going to hit it. // Check for data watchpoints that we might hit when reading/writing // memory. // Make conservative assumptions about the watchpoint type. Applying // unnecessary watchpoints here will only result in a few more singlesteps. // We do have to ignore SI if the instruction doesn't use it; otherwise // a watchpoint which happens to match SI will appear to be hit on every // iteration of the string instruction, which would be devastating. for (auto& watch : t->vm()->all_watchpoints()) { if (decoded.uses_si) { bound_iterations_for_watchpoint(t, t->regs().si(), decoded, watch, &iterations); } bound_iterations_for_watchpoint(t, t->regs().di(), decoded, watch, &iterations); } if (iterations == 0) { return did_execute; } LOG(debug) << "x86-string fast-forward: " << iterations << " iterations required (ip==" << t->ip() << ")"; Registers r = t->regs(); int direction = t->regs().df_flag() ? -1 : 1; // Figure out the address to set a watchpoint at. This address must // be accessed at or before the last iteration we want to perform. // We have to account for a CPU quirk: Intel CPUs may coalesce iterations // to write up to 64 bytes at a time (observed for "rep stosb" on Ivy // Bridge). Assume 128 bytes to be safe. static const unsigned BYTES_COALESCED = 128; uintptr_t watch_offset = decoded.operand_size * (iterations - 1); if (watch_offset > BYTES_COALESCED) { watch_offset -= BYTES_COALESCED; t->vm()->save_watchpoints(); t->vm()->remove_all_watchpoints(); remote_ptr watch_di = t->regs().di() + direction * watch_offset; LOG(debug) << "Set x86-string fast-forward watchpoint at " << watch_di; bool ok = t->vm()->add_watchpoint(watch_di, 1, WATCH_READWRITE); ASSERT(t, ok) << "Can't even handle one watchpoint???"; ok = t->vm()->add_breakpoint(limit_ip, TRAP_BKPT_INTERNAL); ASSERT(t, ok) << "Failed to add breakpoint"; t->resume_execution(RESUME_CONT, RESUME_WAIT, RESUME_UNLIMITED_TICKS); did_execute = true; ASSERT(t, t->pending_sig() == SIGTRAP); // Grab debug_status before restoring watchpoints, since the latter // clears the debug status bool triggered_watchpoint = t->vm()->notify_watchpoint_fired(t->consume_debug_status()); t->vm()->remove_breakpoint(limit_ip, TRAP_BKPT_INTERNAL); t->vm()->restore_watchpoints(); iterations -= cur_cx - t->regs().cx(); if (!triggered_watchpoint) { // watchpoint didn't fire. We must have exited the loop early and // hit the breakpoint. IP will be after the breakpoint instruction. ASSERT(t, t->ip() == limit_ip.increment_by_bkpt_insn_length(t->arch()) && decoded.modifies_flags); // Undo the execution of the breakpoint instruction. Registers tmp = t->regs(); tmp.set_ip(limit_ip); t->set_regs(tmp); } else { watch_offset = decoded.operand_size * (iterations - 1); if (watch_offset > BYTES_COALESCED) { // We fired the watchpoint too early, perhaps because reads through SI // triggered it. Let's just bail out now; better for the caller to // retry // fast_forward_through_instruction than for us to try singlestepping // all the rest of the way. LOG(debug) << "x86-string fast-forward: " << iterations << " iterations to go, but watchpoint hit early; aborted"; return did_execute; } } } LOG(debug) << "x86-string fast-forward: " << iterations << " iterations to go"; // Singlestep through the remaining iterations. while (iterations > 0 && t->ip() == ip) { t->resume_execution(RESUME_SINGLESTEP, RESUME_WAIT, RESUME_UNLIMITED_TICKS); did_execute = true; ASSERT(t, t->pending_sig() == SIGTRAP); t->consume_debug_status(); // Watchpoints can fire spuriously because configure_watch_registers // can increase the size of the watched area to conserve watch registers. --iterations; } if (t->ip() != ip) { // We exited the loop early due to flags being modified. ASSERT(t, t->ip() == limit_ip && decoded.modifies_flags); // String instructions that modify flags don't have non-register side // effects, so we can reset registers to effectively unwind the loop. // Then we try rerunning the loop again, adding this state as one to // avoid stepping into. We shouldn't need to do this more than once! ASSERT(t, states_copy.empty()); extra_state_to_avoid = t->regs(); states_copy = states; states_copy.push_back(&extra_state_to_avoid); using_states = &states_copy; t->set_regs(r); } else { LOG(debug) << "x86-string fast-forward done; ip()==" << t->ip(); // Fake singlestep status for trap diagnosis t->replace_debug_status(DS_SINGLESTEP); return did_execute; } } } static bool is_ignorable_prefix(Task* t, uint8_t byte) { if (byte >= 0x40 && byte <= 0x4f) { // REX prefix return t->arch() == x86_64; } switch (byte) { case 0x26: // ES override case 0x2E: // CS override case 0x36: // SS override case 0x3E: // DS override case 0x64: // FS override case 0x65: // GS override case 0x66: // operand-size override case 0x67: // address-size override case 0xF0: // LOCK return true; default: return false; } } static bool is_rep_prefix(uint8_t byte) { return byte == 0xF2 || byte == 0xF3; } static bool is_string_instruction(uint8_t byte) { switch (byte) { case 0xA4: // MOVSB case 0xA5: // MOVSW case 0xA6: // CMPSB case 0xA7: // CMPSW case 0xAA: // STOSB case 0xAB: // STOSW case 0xAC: // LODSB case 0xAD: // LODSW case 0xAE: // SCASB case 0xAF: // SCASW return true; default: return false; } } static int fallible_read_byte(Task* t, remote_ptr ip) { uint8_t byte; if (t->read_bytes_fallible(ip, 1, &byte) == 0) { return -1; } return byte; } static bool is_string_instruction_at(Task* t, remote_code_ptr ip) { bool found_rep = false; remote_ptr bare_ip = ip.to_data_ptr(); while (true) { int byte = fallible_read_byte(t, bare_ip); if (byte < 0) { return false; } else if (is_rep_prefix(byte)) { found_rep = true; } else if (is_string_instruction(byte)) { return found_rep; } else if (!is_ignorable_prefix(t, byte)) { return false; } ++bare_ip; } } static bool is_string_instruction_before(Task* t, remote_code_ptr ip) { remote_ptr bare_ip = ip.to_data_ptr(); --bare_ip; int byte = fallible_read_byte(t, bare_ip); if (byte < 0 || !is_string_instruction(byte)) { return false; } while (true) { --bare_ip; int byte = fallible_read_byte(t, bare_ip); if (byte < 0) { return false; } else if (is_rep_prefix(byte)) { return true; } else if (!is_ignorable_prefix(t, byte)) { return false; } } } bool maybe_at_or_after_x86_string_instruction(Task* t) { if (!is_x86ish(t)) { return false; } return is_string_instruction_at(t, t->ip()) || is_string_instruction_before(t, t->ip()); } rr-4.1.0/src/fast_forward.h000066400000000000000000000030251265436462100155710ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_FAST_FORWARD_H_ #define RR_FAST_FORWARD_H_ #include #include "task.h" class Registers; /** * Perform one or more synchronous singlesteps of |t|. Usually just does * one singlestep, except when a singlestep leaves the IP unchanged (i.e. a * single instruction represents a loop, such as an x86 REP-prefixed string * instruction). * * |how| must be either RESUME_SINGLESTEP or RESUME_SYSEMU_SINGLESTEP. * * We always perform at least one singlestep. We stop after a singlestep if * one of the following is true, or will be true after one more singlestep: * -- Any breakpoint or watchpoint has been triggered * -- IP has advanced to the next instruction * -- One of the register states in |states| (a null-terminated list) * has been reached. * * Spurious returns after any singlestep are also allowed. * * This will not add more than one tick to t->tick_count(). * * Returns true if we did a fast-forward, false if we just did one regular * singlestep. */ bool fast_forward_through_instruction( Task* t, ResumeRequest how, const std::vector& states); /** * Return true if the instruction at t->ip(), or the instruction immediately * before t->ip(), could be a REP-prefixed string instruction. It's OK to * return true if it's not really a string instruction (though for performance * reasons, this should be rare). */ bool maybe_at_or_after_x86_string_instruction(Task* t); #endif // RR_FAST_FORWARD_H_ rr-4.1.0/src/generate_rr_page.py000077500000000000000000000031701265436462100166060ustar00rootroot00000000000000#!/usr/bin/env python2 import io import os import sys def write_rr_page(f, is_64, is_replay): # The length of each code sequence must be RR_PAGE_SYSCALL_STUB_SIZE. # The end of each syscall instruction must be at offset # RR_PAGE_SYSCALL_INSTRUCTION_END. if is_64: bytes = bytearray([ 0x0f, 0x05, # syscall 0xc3, # ret ]) else: bytes = bytearray([ 0xcd, 0x80, # int 0x80 0xc3, # ret ]) # traced f.write(bytes) # privileged traced f.write(bytes) # untraced replayed f.write(bytes) if is_replay: # regular untraced syscalls are not executed during replay. # Instead we just emulate success. bytes[0] = 0x31 bytes[1] = 0xc0 # xor %eax,%eax # untraced f.write(bytes) # privileged untraced f.write(bytes) generators_for = { 'rr_page_32': lambda stream: write_rr_page(stream, False, False), 'rr_page_64': lambda stream: write_rr_page(stream, True, False), 'rr_page_32_replay': lambda stream: write_rr_page(stream, False, True), 'rr_page_64_replay': lambda stream: write_rr_page(stream, True, True), } def main(argv): filename = argv[0] base = os.path.basename(filename) if os.access(filename, os.F_OK): with open(filename, 'r') as f: before = f.read() else: before = "" stream = io.BytesIO() generators_for[base](stream) after = stream.getvalue() stream.close() if before != after: with open(filename, 'w') as f: f.write(after) if __name__ == '__main__': main(sys.argv[1:]) rr-4.1.0/src/generate_syscalls.py000077500000000000000000000124551265436462100170320ustar00rootroot00000000000000#!/usr/bin/env python2 import assembly_templates import StringIO import os import string import sys import syscalls def write_syscall_enum(f, arch): f.write("enum Syscalls {\n") undefined_syscall = -1 for name, obj in sorted(syscalls.all(), key=lambda x: getattr(x[1], arch)): syscall_number = getattr(obj, arch) if syscall_number is not None: enum_number = syscall_number else: enum_number = undefined_syscall undefined_syscall -= 1 f.write(" %s = %d,\n" % (name, enum_number)) f.write(" SYSCALL_COUNT,\n") f.write("};\n") f.write("\n") def write_syscall_enum_for_tests(f, arch): f.write("enum Syscalls {\n") undefined_syscall = -1 for name, obj in sorted(syscalls.all(), key=lambda x: getattr(x[1], arch)): syscall_number = getattr(obj, arch) if syscall_number is not None: enum_number = syscall_number else: enum_number = undefined_syscall undefined_syscall -= 1 f.write(" RR_%s = %d,\n" % (name, enum_number)) f.write("};\n") f.write("\n") def write_syscallname_arch(f): f.write("template static std::string syscallname_arch(int syscall);\n") f.write("\n"); for specializer, arch in [("X86Arch", "x86"), ("X64Arch", "x64")]: f.write("template <> std::string syscallname_arch<%s>(int syscall) {\n" % specializer) f.write(" switch (syscall) {\n"); def write_case(name): f.write(" case %(specializer)s::%(syscall)s: return \"%(syscall)s\";\n" % { 'specializer': specializer, 'syscall': name }) for name, _ in syscalls.for_arch(arch): write_case(name) f.write(" default: {") f.write(" char buf[100];") f.write(" sprintf(buf, \"\", syscall);") f.write(" return buf;\n") f.write(" }\n") f.write(" }\n") f.write("}\n") f.write("\n") def write_syscall_record_cases(f): def write_recorder_for_arg(syscall, arg): arg_descriptor = getattr(syscall, 'arg' + str(arg), None) if isinstance(arg_descriptor, str): f.write(" syscall_state.reg_parameter<%s>(%d);\n" % (arg_descriptor, arg)) for name, obj in syscalls.all(): # Irregular syscalls will be handled by hand-written code elsewhere. if isinstance(obj, syscalls.RegularSyscall): f.write(" case Arch::%s:\n" % name) for arg in range(1,6): write_recorder_for_arg(obj, arg) f.write(" return PREVENT_SWITCH;\n") has_syscall = string.Template("""inline bool has_${syscall}_syscall(SupportedArch arch) { switch (arch) { case x86: return X86Arch::${syscall} >= 0; case x86_64: return X64Arch::${syscall} >= 0; default: assert(0 && "unsupported architecture"); } } """) is_syscall = string.Template("""inline bool is_${syscall}_syscall(int syscallno, SupportedArch arch) { switch (arch) { case x86: return syscallno >= 0 && syscallno == X86Arch::${syscall}; case x86_64: return syscallno >= 0 && syscallno == X64Arch::${syscall}; default: assert(0 && "unsupported architecture"); } } """) syscall_number = string.Template("""inline int syscall_number_for_${syscall}(SupportedArch arch) { switch (arch) { case x86: assert(X86Arch::${syscall} >= 0); return X86Arch::${syscall}; case x86_64: assert(X64Arch::${syscall} >= 0); return X64Arch::${syscall}; default: assert(0 && "unsupported architecture"); } } """) def write_syscall_helper_functions(f): def write_helpers(syscall): subs = { 'syscall': syscall } f.write(has_syscall.safe_substitute(subs)) f.write(is_syscall.safe_substitute(subs)) f.write(syscall_number.safe_substitute(subs)) for name, obj in syscalls.all(): write_helpers(name) def write_check_syscall_numbers(f): for name, obj in syscalls.all(): # XXX hard-coded to x86 currently if not obj.x86: continue f.write("""static_assert(X86Arch::%s == SYS_%s, "Incorrect syscall number for %s");\n""" % (name, name, name)) generators_for = { 'AssemblyTemplates': lambda f: assembly_templates.generate(f), 'CheckSyscallNumbers': write_check_syscall_numbers, 'SyscallEnumsX86': lambda f: write_syscall_enum(f, 'x86'), 'SyscallEnumsX64': lambda f: write_syscall_enum(f, 'x64'), 'SyscallEnumsForTestsX86': lambda f: write_syscall_enum_for_tests(f, 'x86'), 'SyscallEnumsForTestsX64': lambda f: write_syscall_enum_for_tests(f, 'x64'), 'SyscallnameArch': write_syscallname_arch, 'SyscallRecordCase': write_syscall_record_cases, 'SyscallHelperFunctions': write_syscall_helper_functions, } def main(argv): filename = argv[0] base, extension = os.path.splitext(os.path.basename(filename)) if os.access(filename, os.F_OK): with open(filename, 'r') as f: before = f.read() else: before = "" stream = StringIO.StringIO() generators_for[base](stream) after = stream.getvalue() stream.close() if before != after: with open(filename, 'w') as f: f.write(after) if __name__ == '__main__': main(sys.argv[1:]) rr-4.1.0/src/kernel_abi.cc000066400000000000000000000027721265436462100153510ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "kernel_abi.h" #include #include "task.h" using namespace std; static const uint8_t int80_insn[] = { 0xcd, 0x80 }; static const uint8_t sysenter_insn[] = { 0x0f, 0x34 }; static const uint8_t syscall_insn[] = { 0x0f, 0x05 }; namespace rr { bool is_at_syscall_instruction(Task* t, remote_code_ptr ptr) { vector code = t->read_mem(ptr.to_data_ptr(), 2); switch (t->arch()) { case x86: return memcmp(code.data(), int80_insn, sizeof(int80_insn)) == 0 || memcmp(code.data(), sysenter_insn, sizeof(sysenter_insn)) == 0; case x86_64: return memcmp(code.data(), syscall_insn, sizeof(syscall_insn)) == 0 || memcmp(code.data(), sysenter_insn, sizeof(sysenter_insn)) == 0; default: assert(0 && "Need to define syscall instructions"); return false; } } vector syscall_instruction(SupportedArch arch) { switch (arch) { case x86: return vector(int80_insn, int80_insn + sizeof(int80_insn)); case x86_64: return vector(syscall_insn, syscall_insn + sizeof(syscall_insn)); default: assert(0 && "Need to define syscall instruction"); return vector(); } } ssize_t syscall_instruction_length(SupportedArch arch) { switch (arch) { case x86: case x86_64: return 2; default: assert(0 && "Need to define syscall instruction length"); return 0; } } } rr-4.1.0/src/kernel_abi.h000066400000000000000000001133431265436462100152100ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_KERNEL_ABI_H #define RR_KERNEL_ABI_H // Include remote_ptr.h first since it (indirectly) requires a definition of // ERANGE, which other headers below #undef :-( #include "remote_ptr.h" // Get all the kernel definitions so we can verify our alternative versions. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include class remote_code_ptr; class Task; enum SupportedArch { x86, x86_64, SupportedArch_MAX = x86_64 }; namespace rr { #if defined(__i386__) const SupportedArch RR_NATIVE_ARCH = SupportedArch::x86; #elif defined(__x86_64__) const SupportedArch RR_NATIVE_ARCH = SupportedArch::x86_64; #else #error need to define new SupportedArch enum #endif template struct Verifier { // Optimistically say we are the same size. static const bool same_size = true; }; template struct Verifier { static const bool same_size = sizeof(system_type) == sizeof(rr_type); }; template struct Verifier { // Prevent us from accidentally verifying the size of rr's structure // with itself or (unlikely) the system's structure with itself. }; #define RR_VERIFY_TYPE_ARCH(arch_, system_type_, rr_type_) \ static_assert(Verifier::same_size, \ "type " #system_type_ " not correctly defined"); // For instances where the system type and the rr type are named differently. #define RR_VERIFY_TYPE_EXPLICIT(system_type_, rr_type_) \ RR_VERIFY_TYPE_ARCH(arch_, system_type_, rr_type_) // For instances where the system type and the rr type are named identically. #define RR_VERIFY_TYPE(type_) RR_VERIFY_TYPE_EXPLICIT(::type_, type_) struct KernelConstants { static const ::size_t SIGINFO_MAX_SIZE = 128; // These types are the same size everywhere. typedef int32_t pid_t; typedef uint32_t uid_t; typedef uint32_t gid_t; typedef uint32_t socklen_t; typedef uint64_t dev_t; typedef uint32_t mode_t; typedef int32_t __kernel_timer_t; }; // These duplicate the matching F_* constants for commands for fcntl, with two // small differences: we unconditionally define the *64 variants to their values // for 32-bit systems. This change enables us to always use our constants in // switch cases without worrying about duplicated case values and makes dealing // with 32-bit and 64-bit tracees in the same rr process simpler. // // The other small difference is that we define these constants without the F_ // prefix, so as to not run afoul of the C preprocessor. struct FcntlConstants { enum FcntlOperation { DUPFD = 0, GETFD = 1, SETFD = 2, GETFL = 3, SETFL = 4, GETLK = 5, SETLK = 6, SETLKW = 7, SETOWN = 8, GETOWN = 9, SETSIG = 10, GETSIG = 11, GETLK64 = 12, SETLK64 = 13, SETLKW64 = 14, SETOWN_EX = 15, GETOWN_EX = 16, // Linux-specific operations DUPFD_CLOEXEC = 0x400 + 6, ADD_SEALS = 0x400 + 9 }; }; struct WordSize32Defs : public KernelConstants { static const ::size_t SIGINFO_PAD_SIZE = (SIGINFO_MAX_SIZE / sizeof(int32_t)) - 3; typedef int16_t signed_short; typedef uint16_t unsigned_short; typedef int32_t signed_int; typedef uint32_t unsigned_int; typedef int32_t signed_long; typedef uint32_t unsigned_long; typedef int32_t signed_word; typedef uint32_t unsigned_word; typedef uint32_t size_t; typedef int32_t ssize_t; // These really only exist as proper abstractions so that adding x32 // (x86-64's ILP32 ABI) support is relatively easy. typedef int32_t syscall_slong_t; typedef uint32_t syscall_ulong_t; typedef int32_t sigchld_clock_t; typedef uint32_t __statfs_word; static const size_t elfclass = ELFCLASS32; typedef Elf32_Ehdr ElfEhdr; typedef Elf32_Shdr ElfShdr; typedef Elf32_Sym ElfSym; }; struct WordSize64Defs : public KernelConstants { static const ::size_t SIGINFO_PAD_SIZE = (SIGINFO_MAX_SIZE / sizeof(int32_t)) - 4; typedef int16_t signed_short; typedef uint16_t unsigned_short; typedef int32_t signed_int; typedef uint32_t unsigned_int; typedef int64_t signed_long; typedef uint64_t unsigned_long; typedef int64_t signed_word; typedef uint64_t unsigned_word; typedef uint64_t size_t; typedef int64_t ssize_t; // These really only exist as proper abstractions so that adding x32 // (x86-64's ILP32 ABI) support is relatively easy. typedef int64_t syscall_slong_t; typedef uint64_t syscall_ulong_t; typedef int64_t sigchld_clock_t; typedef signed_long __statfs_word; static const size_t elfclass = ELFCLASS64; typedef Elf64_Ehdr ElfEhdr; typedef Elf64_Shdr ElfShdr; typedef Elf64_Sym ElfSym; }; /** * Structs defined in BaseArch and its derivatives should not contain any * holes. Holes can cause divergence if such structs are copied from rr to * the tracee. */ template struct BaseArch : public wordsize, public FcntlConstants { static SupportedArch arch() { return arch_; } typedef typename wordsize::syscall_slong_t syscall_slong_t; typedef typename wordsize::syscall_ulong_t syscall_ulong_t; typedef typename wordsize::signed_int signed_int; typedef typename wordsize::unsigned_int unsigned_int; typedef typename wordsize::signed_short signed_short; typedef typename wordsize::unsigned_short unsigned_short; typedef typename wordsize::signed_long signed_long; typedef typename wordsize::unsigned_long unsigned_long; typedef typename wordsize::unsigned_word unsigned_word; typedef typename wordsize::size_t size_t; typedef typename wordsize::ssize_t ssize_t; typedef typename wordsize::sigchld_clock_t sigchld_clock_t; typedef typename wordsize::__statfs_word __statfs_word; typedef syscall_slong_t time_t; typedef syscall_slong_t off_t; typedef syscall_slong_t blkcnt_t; typedef syscall_slong_t blksize_t; typedef syscall_ulong_t rlim_t; typedef syscall_ulong_t fsblkcnt_t; typedef syscall_ulong_t fsfilcnt_t; typedef syscall_ulong_t ino_t; typedef syscall_ulong_t nlink_t; typedef int64_t off64_t; typedef uint64_t rlim64_t; typedef uint64_t ino64_t; typedef int64_t blkcnt64_t; typedef syscall_slong_t clock_t; typedef signed_int __kernel_key_t; typedef signed_int __kernel_uid32_t; typedef signed_int __kernel_gid32_t; typedef unsigned_int __kernel_mode_t; typedef unsigned_long __kernel_ulong_t; typedef signed_long __kernel_long_t; typedef __kernel_long_t __kernel_time_t; typedef __kernel_long_t __kernel_suseconds_t; typedef signed_int __kernel_pid_t; typedef int64_t __kernel_loff_t; typedef unsigned_int __u32; template struct ptr { typedef T Referent; unsigned_word val; template operator remote_ptr() const { return rptr(); } /** * Sometimes you need to call rptr() directly to resolve ambiguous * overloading. */ remote_ptr rptr() const { return remote_ptr(val); } template ptr& operator=(remote_ptr p) { remote_ptr pt = p; val = pt.as_int(); assert(val == pt.as_int()); return *this; } operator bool() const { return val; } static size_t referent_size() { return sizeof(T); } }; union sigval_t { signed_int sival_int; ptr sival_ptr; }; struct sockaddr { unsigned_short sa_family; char sa_data[14]; }; RR_VERIFY_TYPE(sockaddr); struct sockaddr_un { unsigned_short sun_family; char sun_path[108]; }; RR_VERIFY_TYPE(sockaddr_un); struct timeval { __kernel_time_t tv_sec; __kernel_suseconds_t tv_usec; }; RR_VERIFY_TYPE(timeval); struct timespec { __kernel_time_t tv_sec; syscall_slong_t tv_nsec; }; RR_VERIFY_TYPE(timespec); struct pollfd { signed_int fd; signed_short events; signed_short revents; }; RR_VERIFY_TYPE(pollfd); struct iovec { ptr iov_base; size_t iov_len; }; RR_VERIFY_TYPE(iovec); struct msghdr { ptr msg_name; socklen_t msg_namelen; char _padding[sizeof(ptr) - sizeof(socklen_t)]; ptr msg_iov; size_t msg_iovlen; ptr msg_control; size_t msg_controllen; signed_int msg_flags; }; RR_VERIFY_TYPE(msghdr); struct cmsghdr { size_t cmsg_len; int cmsg_level; int cmsg_type; }; RR_VERIFY_TYPE(cmsghdr); struct mmsghdr { msghdr msg_hdr; unsigned_int msg_len; }; RR_VERIFY_TYPE(mmsghdr); // x86-64 is the only architecture to pack this structure, and it does // so to make the x86 and x86-64 definitions identical. So even if // we're compiling on an x86-64 host that will support recording // 32-bit and 64-bit programs, this is the correct way to declare // epoll_event for both kinds of recordees. // See . #if defined(__x86_64__) #define RR_EPOLL_PACKED __attribute__((packed)) #else #define RR_EPOLL_PACKED #endif struct epoll_event { union epoll_data { ptr ptr_; signed_int fd; uint32_t u32; uint64_t u64; }; uint32_t events; epoll_data data; } RR_EPOLL_PACKED; RR_VERIFY_TYPE(epoll_event); #undef RR_EPOLL_PACKED struct rusage { timeval ru_utime; timeval ru_stime; signed_long ru_maxrss; signed_long ru_ixrss; signed_long ru_idrss; signed_long ru_isrss; signed_long ru_minflt; signed_long ru_majflt; signed_long ru_nswap; signed_long ru_inblock; signed_long ru_oublock; signed_long ru_msgnsd; signed_long ru_msgrcv; signed_long ru_nsignals; signed_long ru_nvcsw; signed_long ru_nivcsw; }; RR_VERIFY_TYPE(rusage); struct siginfo_t { signed_int si_signo; signed_int si_errno; signed_int si_code; union { signed_int padding[wordsize::SIGINFO_PAD_SIZE]; // #defines all the field names belong due to X/Open // requirements, so we append '_'. struct { pid_t si_pid_; uid_t si_uid_; } _kill; struct { signed_int si_tid_; signed_int si_overrun_; sigval_t si_sigval_; } _timer; struct { pid_t si_pid_; uid_t si_uid_; sigval_t si_sigval_; } _rt; struct { pid_t si_pid_; uid_t si_uid_; signed_int si_status_; sigchld_clock_t si_utime_; sigchld_clock_t si_stime_; } _sigchld; struct { ptr si_addr_; signed_short si_addr_lsb_; } _sigfault; struct { signed_long si_band_; signed_int si_fd_; } _sigpoll; struct { ptr _call_addr; signed_int _syscall; unsigned_int _arch; } _sigsys; } _sifields; }; RR_VERIFY_TYPE_EXPLICIT(siginfo_t, ::siginfo_t) typedef unsigned char cc_t; typedef unsigned_int speed_t; typedef unsigned_int tcflag_t; struct termios { tcflag_t c_iflag; tcflag_t c_oflag; tcflag_t c_cflag; tcflag_t c_lflag; cc_t c_line; cc_t c_cc[32]; char _padding[3]; speed_t c_ispeed; speed_t c_ospeed; }; RR_VERIFY_TYPE(termios); struct winsize { unsigned_short ws_row; unsigned_short ws_col; unsigned_short ws_xpixel; unsigned_short ws_ypixel; }; RR_VERIFY_TYPE(winsize); struct ipc64_perm { __kernel_key_t key; __kernel_uid32_t uid; __kernel_gid32_t gid; __kernel_uid32_t cuid; __kernel_gid32_t cgid; __kernel_mode_t mode; unsigned_short seq; unsigned_short __pad2; char __pad3[sizeof(__kernel_ulong_t) - 2 * sizeof(unsigned_short)]; __kernel_ulong_t unused1; __kernel_ulong_t unused2; }; RR_VERIFY_TYPE(ipc64_perm); struct msqid64_ds { ipc64_perm msg_perm; // These msg*time fields are really __kernel_time_t plus // appropiate padding. We don't touch the fields, though. // // We do, however, suffix them with _only_little_endian to // urge anybody who does touch them to make sure the right // thing is done for big-endian systems. uint64_t msg_stime_only_little_endian; uint64_t msg_rtime_only_little_endian; uint64_t msg_ctime_only_little_endian; __kernel_ulong_t msg_cbytes; __kernel_ulong_t msg_qnum; __kernel_ulong_t msg_qbytes; __kernel_pid_t msg_lspid; __kernel_pid_t msg_lrpid; __kernel_ulong_t unused1; __kernel_ulong_t unused2; }; RR_VERIFY_TYPE(msqid64_ds); struct msginfo { signed_int msgpool; signed_int msgmap; signed_int msgmax; signed_int msgmnb; signed_int msgmni; signed_int msgssz; signed_int msgtql; unsigned_short msgseg; }; RR_VERIFY_TYPE(msginfo); struct shmid64_ds { ipc64_perm shm_perm; size_t shm_segsz; uint64_t shm_atime_only_little_endian; uint64_t shm_dtime_only_little_endian; uint64_t shm_ctime_only_little_endian; __kernel_pid_t shm_cpid; __kernel_pid_t shm_lpid; __kernel_ulong_t shm_nattch; __kernel_ulong_t unused4; __kernel_ulong_t unused5; }; RR_VERIFY_TYPE(shmid64_ds); struct shminfo64 { __kernel_ulong_t shmmax; __kernel_ulong_t shmmin; __kernel_ulong_t shmmni; __kernel_ulong_t shmseg; __kernel_ulong_t shmall; __kernel_ulong_t unused1; __kernel_ulong_t unused2; __kernel_ulong_t unused3; __kernel_ulong_t unused4; }; RR_VERIFY_TYPE(shminfo64); struct shm_info { int used_ids; char __pad[sizeof(__kernel_ulong_t) - sizeof(int)]; __kernel_ulong_t shm_tot; __kernel_ulong_t shm_rss; __kernel_ulong_t shm_swp; __kernel_ulong_t swap_attempts; __kernel_ulong_t swap_successes; }; RR_VERIFY_TYPE(shm_info); struct semid64_ds { ipc64_perm sem_perm; __kernel_time_t sem_otime; __kernel_ulong_t __unused1; __kernel_time_t sem_ctime; __kernel_ulong_t __unused2; __kernel_ulong_t sem_nsems; __kernel_ulong_t __unused3; __kernel_ulong_t __unused4; }; RR_VERIFY_TYPE(semid64_ds); struct seminfo { int semmap; int semmni; int semmns; int semmnu; int semmsl; int semopm; int semume; int semusz; int semvmx; int semaem; }; RR_VERIFY_TYPE(seminfo); // The clone(2) syscall has four (!) different calling conventions, // depending on what architecture it's being compiled for. We describe // the orderings for x86oids here. enum CloneParameterOrdering { FlagsStackParentTLSChild, FlagsStackParentChildTLS, }; // Despite the clone(2) manpage describing the clone syscall as taking a // pointer to |struct user_desc*|, the actual kernel interface treats the // TLS value as a opaque cookie, which architectures are then free to do // whatever they like with. See for instance the definition of TLS_VALUE // in nptl/sysdeps/pthread/createthread.c in the glibc source. We need to // describe what the architecture uses so we can record things accurately. enum CloneTLSType { // |struct user_desc*| UserDescPointer, // This is the default choice for TLS_VALUE in the glibc source. PthreadStructurePointer, }; struct user_desc { unsigned_int entry_number; unsigned_int base_addr; unsigned_int limit; unsigned_int seg_32bit : 1; unsigned_int contents : 2; unsigned_int read_exec_only : 1; unsigned_int limit_in_pages : 1; unsigned_int seg_not_present : 1; unsigned_int useable : 1; unsigned_int lm : 1; }; RR_VERIFY_TYPE(user_desc); struct __user_cap_header_struct { __u32 version; int pid; }; RR_VERIFY_TYPE(__user_cap_header_struct); struct __user_cap_data_struct { __u32 effective; __u32 permitted; __u32 inheritable; }; RR_VERIFY_TYPE(__user_cap_data_struct); // This structure uses fixed-size fields, but the padding rules // for 32-bit vs. 64-bit architectures dictate that it be // defined in full. struct dqblk { uint64_t dqb_bhardlimit; uint64_t dqb_bsoftlimit; uint64_t dqb_curspace; uint64_t dqb_ihardlimit; uint64_t dqb_isoftlimit; uint64_t dqb_curinodes; uint64_t dqb_btime; uint64_t dqb_itime; uint32_t dqb_valid; }; RR_VERIFY_TYPE(dqblk); struct dqinfo { uint64_t dqi_bgrace; uint64_t dqi_igrace; uint32_t dqi_flags; uint32_t dqi_valid; }; RR_VERIFY_TYPE(dqinfo); struct ifmap { unsigned_long mem_start; unsigned_long mem_end; unsigned_short base_addr; unsigned char irq; unsigned char dma; unsigned char port; }; RR_VERIFY_TYPE(ifmap); struct if_settings { unsigned_int type; unsigned_int size; union { ptr raw_hdlc; ptr cisco; ptr fr; ptr fr_pvc; ptr fr_pvc_info; ptr sync; ptr tel; } ifs_ifsu; }; RR_VERIFY_TYPE(if_settings); struct ifreq { union { char ifrn_name[16]; } ifr_ifrn; union { sockaddr ifru_addr; sockaddr ifru_dstaddr; sockaddr ifru_broadaddr; sockaddr ifru_netmask; sockaddr ifru_hwaddr; signed_short ifru_flags; signed_int ifru_ivalue; signed_int ifru_mtu; ifmap ifru_map; char ifru_slave[16]; char ifru_newname[16]; ptr ifru_data; if_settings ifru_settings; } ifr_ifru; }; RR_VERIFY_TYPE(ifreq); struct ifconf { signed_int ifc_len; char __pad[sizeof(ptr) - sizeof(int)]; union { ptr ifcu_buf; ptr ifcu_req; } ifc_ifcu; }; RR_VERIFY_TYPE(ifconf); struct iw_param { int32_t value; uint8_t fixed; uint8_t disabled; uint16_t flags; }; RR_VERIFY_TYPE(iw_param); struct iw_point { ptr pointer; uint16_t length; uint16_t flags; }; RR_VERIFY_TYPE(iw_point); struct iw_freq { int32_t m; int16_t e; uint8_t i; uint8_t flags; }; RR_VERIFY_TYPE(iw_freq); struct iw_quality { uint8_t qual; uint8_t level; uint8_t noise; uint8_t updated; }; RR_VERIFY_TYPE(iw_quality); union iwreq_data { char name[16]; iw_point essid; iw_param nwid; iw_freq freq; iw_param sens; iw_param bitrate; iw_param txpower; iw_param rts; iw_param frag; uint32_t mode; iw_param retry; iw_point encoding; iw_param power; iw_quality qual; sockaddr ap_addr; sockaddr addr; iw_param param; iw_point data; }; RR_VERIFY_TYPE(iwreq_data); struct iwreq { union { char ifrn_name[16]; } ifr_ifrn; iwreq_data u; }; RR_VERIFY_TYPE(iwreq); struct ethtool_cmd { uint32_t cmd; uint32_t supported; uint32_t advertising; uint16_t speed; uint8_t duplex; uint8_t port; uint8_t phy_address; uint8_t transceiver; uint8_t autoneg; uint8_t mdio_support; uint32_t maxtxpkt; uint32_t maxrxpkt; uint16_t speed_hi; uint8_t eth_tp_mdix; uint8_t eth_tp_mdix_ctrl; uint32_t lp_advertising; uint32_t reserved[2]; }; RR_VERIFY_TYPE(ethtool_cmd); struct flock { signed_short l_type; signed_short l_whence; char __pad[sizeof(off_t) - 2 * sizeof(short)]; off_t l_start; off_t l_len; pid_t l_pid; }; RR_VERIFY_TYPE(flock); struct flock64 { signed_short l_type; signed_short l_whence; // No padding on 32-bit, 4 bytes of padding on 64-bit char __pad[sizeof(uint32_t) - 2 * sizeof(short)]; uint64_t l_start; uint64_t l_len; pid_t l_pid; }; RR_VERIFY_TYPE(flock64); struct f_owner_ex { signed_int type; __kernel_pid_t pid; }; RR_VERIFY_TYPE(f_owner_ex); // Define various structures that package up syscall arguments. // The types of their members are part of the ABI, and defining // them here makes their definitions more concise. struct accept_args { signed_int sockfd; char __pad[sizeof(ptr) - sizeof(int)]; ptr addr; ptr addrlen; }; struct accept4_args : public accept_args { signed_long flags; }; struct getsockname_args { signed_int sockfd; char __pad[sizeof(ptr) - sizeof(int)]; ptr addr; ptr addrlen; }; struct getsockopt_args { signed_int sockfd; signed_int level; signed_int optname; char __pad[sizeof(ptr) - sizeof(int)]; ptr optval; ptr optlen; }; struct recv_args { signed_int sockfd; char __pad[sizeof(ptr) - sizeof(int)]; ptr buf; size_t len; signed_int flags; }; struct recvfrom_args { signed_long sockfd; ptr buf; size_t len; signed_long flags; ptr src_addr; ptr addrlen; }; struct recvmsg_args { signed_int fd; char __pad[sizeof(ptr) - sizeof(int)]; ptr msg; signed_int flags; }; struct recvmmsg_args { signed_int sockfd; char __pad[sizeof(ptr) - sizeof(int)]; ptr msgvec; unsigned_int vlen; unsigned_int flags; ptr timeout; }; struct sendmsg_args { signed_int fd; char __pad[sizeof(ptr) - sizeof(int)]; ptr msg; signed_int flags; }; struct sendmmsg_args { signed_int sockfd; char __pad[sizeof(ptr) - sizeof(int)]; ptr msgvec; unsigned_int vlen; unsigned_int flags; }; struct socketpair_args { signed_int domain; signed_int type; signed_int protocol; char __pad[sizeof(ptr) - sizeof(int)]; ptr sv; // int sv[2] }; // All architectures have an mmap syscall, but it has architecture-specific // calling semantics. We describe those here, and specializations need to // indicate which semantics they use. enum MmapCallingSemantics { StructArguments, // x86-ish, packaged into mmap_args, below RegisterArguments, // arguments passed in registers, the offset // is assumed to be in bytes, not in pages. }; struct mmap_args { ptr addr; size_t len; signed_int prot; signed_int flags; signed_int fd; char __pad[sizeof(off_t) - sizeof(int)]; off_t offset; }; // All architectures have a select syscall, but like mmap, there are two // different calling styles: one that packages the args into a structure, // and one that handles the args in registers. (Architectures using the // first style, like the x86, sometimes support the register-args version // as a separate syscall.) // // (Yes, we'd like to call these StructArguments and RegisterArguments, but // that would conflict with MmapCallingSemantics, above.) enum SelectCallingSemantics { SelectStructArguments, SelectRegisterArguments, }; static const size_t MAX_FDS = 1024; struct fd_set { unsigned_long fds_bits[MAX_FDS / (8 * sizeof(unsigned_long))]; }; struct select_args { signed_int n_fds; char __pad[sizeof(ptr) - sizeof(int)]; ptr read_fds; ptr write_fds; ptr except_fds; ptr timeout; }; /** * Some ipc calls require 7 params, so two of them are stashed into * one of these structs and a pointer to this is passed instead. */ struct ipc_kludge_args { ptr msgbuf; signed_long msgtype; }; struct __sysctl_args { ptr name; signed_int nlen; char __pad[sizeof(ptr) - sizeof(int)]; ptr oldval; ptr oldlenp; ptr newval; ptr newlen; unsigned_long __unused[4]; }; RR_VERIFY_TYPE(__sysctl_args); typedef struct { unsigned_long __val[1024 / (8 * sizeof(unsigned_long))]; } __sigset_t; typedef __sigset_t sigset_t; RR_VERIFY_TYPE(sigset_t); struct kernel_sigaction { ptr k_sa_handler; unsigned_long sa_flags; ptr sa_restorer; sigset_t sa_mask; }; // The 'size' parameter to pass to rt_sigaction. Only this value works, // even though sizeof(sigset_t) > 8 (it's actually 128 with kernel 3.16, // as above). enum { sigaction_sigset_size = 8 }; struct tms { clock_t tms_utime; clock_t tms_stime; clock_t tms_cutime; clock_t tms_cstime; }; RR_VERIFY_TYPE(tms); struct rlimit { rlim_t rlim_cur; rlim_t rlim_max; }; RR_VERIFY_TYPE(rlimit); struct rlimit64 { rlim64_t rlim_cur; rlim64_t rlim_max; }; RR_VERIFY_TYPE(rlimit64); struct timezone { int tz_minuteswest; int tz_dsttime; }; RR_VERIFY_TYPE_EXPLICIT(struct ::timezone, timezone); struct statfs { __statfs_word f_type; __statfs_word f_bsize; __statfs_word f_blocks; __statfs_word f_bfree; __statfs_word f_bavail; __statfs_word f_files; __statfs_word f_ffree; struct { int __val[2]; } f_fsid; __statfs_word f_namelen; __statfs_word f_frsize; __statfs_word f_flags; __statfs_word f_spare[4]; }; RR_VERIFY_TYPE_EXPLICIT(struct ::statfs, statfs); struct statfs64 { __statfs_word f_type; __statfs_word f_bsize; uint64_t f_blocks; uint64_t f_bfree; uint64_t f_bavail; uint64_t f_files; uint64_t f_ffree; struct { int __val[2]; } f_fsid; __statfs_word f_namelen; __statfs_word f_frsize; __statfs_word f_flags; __statfs_word f_spare[4]; }; RR_VERIFY_TYPE_EXPLICIT(struct ::statfs64, statfs64); struct itimerval { timeval it_interval; timeval it_value; }; RR_VERIFY_TYPE(itimerval); struct itimerspec { timespec it_interval; timespec it_value; }; RR_VERIFY_TYPE(itimerspec); typedef struct sigaltstack { ptr ss_sp; int ss_flags; char __pad[sizeof(size_t) - sizeof(int)]; size_t ss_size; } stack_t; RR_VERIFY_TYPE(stack_t); struct sysinfo { __kernel_long_t uptime; __kernel_ulong_t loads[3]; __kernel_ulong_t totalram; __kernel_ulong_t freeram; __kernel_ulong_t sharedram; __kernel_ulong_t bufferram; __kernel_ulong_t totalswap; __kernel_ulong_t freeswap; uint16_t procs; uint16_t pad; char __pad[sizeof(__kernel_ulong_t) - 2 * sizeof(uint16_t)]; __kernel_ulong_t totalhigh; __kernel_ulong_t freehigh; uint32_t mem_unit; char _f[20 - 2 * sizeof(__kernel_ulong_t) - sizeof(uint32_t)]; }; RR_VERIFY_TYPE_EXPLICIT(struct ::sysinfo, sysinfo); static const ::size_t UTSNAME_LENGTH = 65; struct utsname { char sysname[UTSNAME_LENGTH]; char nodename[UTSNAME_LENGTH]; char release[UTSNAME_LENGTH]; char version[UTSNAME_LENGTH]; char machine[UTSNAME_LENGTH]; char domainname[UTSNAME_LENGTH]; }; RR_VERIFY_TYPE(utsname); struct sched_param { int __sched_priority; }; RR_VERIFY_TYPE(sched_param); static void* cmsg_data(cmsghdr* cmsg) { return cmsg + 1; } static size_t cmsg_align(size_t len) { return (len + sizeof(size_t) - 1) & ~(sizeof(size_t) - 1); } static size_t cmsg_space(size_t len) { return cmsg_align(sizeof(cmsghdr)) + cmsg_align(len); } static size_t cmsg_len(size_t len) { return cmsg_align(sizeof(cmsghdr)) + len; } struct v4l2_timecode { uint32_t type; uint32_t flags; uint8_t frames; uint8_t seconds; uint8_t minutes; uint8_t hours; uint8_t userbits[4]; }; RR_VERIFY_TYPE(v4l2_timecode); struct v4l2_buffer { uint32_t index; uint32_t type; uint32_t bytesused; uint32_t flags; uint32_t field; char __pad[sizeof(__kernel_ulong_t) - sizeof(uint32_t)]; struct timeval timestamp; struct v4l2_timecode timecode; uint32_t sequence; uint32_t memory; union { uint32_t offset; unsigned_long userptr; ptr planes; int32_t fd; } m; uint32_t length; uint32_t reserved2; uint32_t reserved; }; RR_VERIFY_TYPE(v4l2_buffer); struct sock_filter { uint16_t code; uint8_t jt; uint8_t jf; uint32_t k; }; RR_VERIFY_TYPE(sock_filter); struct sock_fprog { uint16_t len; char _padding[sizeof(ptr) - sizeof(uint16_t)]; ptr filter; }; RR_VERIFY_TYPE(sock_fprog); struct robust_list { ptr next; }; RR_VERIFY_TYPE(robust_list); struct robust_list_head { robust_list list; signed_long futex_offset; ptr list_op_pending; }; RR_VERIFY_TYPE(robust_list_head); struct snd_ctl_card_info { int card; int pad; unsigned char id[16]; unsigned char driver[16]; unsigned char name[32]; unsigned char longname[80]; unsigned char reserved_[16]; unsigned char mixername[80]; unsigned char components[128]; }; RR_VERIFY_TYPE(snd_ctl_card_info); }; struct X86Arch : public BaseArch { static const size_t elfmachine = EM_386; static const size_t elfendian = ELFDATA2LSB; static const MmapCallingSemantics mmap_semantics = StructArguments; static const CloneTLSType clone_tls_type = UserDescPointer; static const CloneParameterOrdering clone_parameter_ordering = FlagsStackParentTLSChild; static const SelectCallingSemantics select_semantics = SelectStructArguments; // The getgroups syscall (as well as several others) differs between // architectures depending on whether they ever supported 16-bit // {U,G}IDs or not. Architectures such as x86, which did support // 16-bit {U,G}IDs, have a getgroups syscall for the 16-bit GID case // and a getgroups32 syscall for the 32-bit GID case. Architectures // such as as x86-64, which support 32-bit GIDs exclusively, have only // a getgroups syscall. We need to know which one we're dealing with // when recording and replaying getgroups and related syscalls. typedef uint16_t legacy_uid_t; typedef uint16_t legacy_gid_t; #include "SyscallEnumsX86.generated" struct user_regs_struct { int32_t ebx; int32_t ecx; int32_t edx; int32_t esi; int32_t edi; int32_t ebp; int32_t eax; int32_t xds; int32_t xes; int32_t xfs; int32_t xgs; int32_t orig_eax; int32_t eip; int32_t xcs; int32_t eflags; int32_t esp; int32_t xss; }; RR_VERIFY_TYPE_ARCH(SupportedArch::x86, ::user_regs_struct, user_regs_struct); struct user_fpregs_struct { int32_t cwd; int32_t swd; int32_t twd; int32_t fip; int32_t fcs; int32_t foo; int32_t fos; int32_t st_space[20]; }; RR_VERIFY_TYPE_ARCH(SupportedArch::x86, ::user_fpregs_struct, user_fpregs_struct); struct user_fpxregs_struct { uint16_t cwd; uint16_t swd; uint16_t twd; uint16_t fop; int32_t fip; int32_t fcs; int32_t foo; int32_t fos; int32_t mxcsr; int32_t reserved; int32_t st_space[32]; int32_t xmm_space[32]; int32_t padding[56]; }; #if defined(__i386__) RR_VERIFY_TYPE_ARCH(SupportedArch::x86, ::user_fpxregs_struct, user_fpxregs_struct); #endif struct user { user_regs_struct regs; int u_fpvalid; user_fpregs_struct i387; uint32_t u_tsize; uint32_t u_dsize; uint32_t u_ssize; uint32_t start_code; uint32_t start_stack; int32_t signal; int reserved; ptr u_ar0; ptr u_fpstate; uint32_t magic; char u_comm[32]; int u_debugreg[8]; }; RR_VERIFY_TYPE_ARCH(SupportedArch::x86, ::user, user); struct stat { dev_t st_dev; unsigned_short __pad1; ino_t st_ino; mode_t st_mode; nlink_t st_nlink; uid_t st_uid; gid_t st_gid; dev_t st_rdev; unsigned_short __pad2; off_t st_size; blksize_t st_blksize; blkcnt_t st_blocks; timespec st_atim; timespec st_mtim; timespec st_ctim; unsigned_long __unused4; unsigned_long __unused5; }; RR_VERIFY_TYPE_ARCH(SupportedArch::x86, struct ::stat, struct stat); struct stat64 { dev_t st_dev; unsigned_int __pad1; ino_t __st_ino; mode_t st_mode; nlink_t st_nlink; uid_t st_uid; gid_t st_gid; dev_t st_rdev; unsigned_int __pad2; off64_t st_size; blksize_t st_blksize; blkcnt64_t st_blocks; timespec st_atim; timespec st_mtim; timespec st_ctim; ino64_t st_ino; }; RR_VERIFY_TYPE_ARCH(SupportedArch::x86, struct ::stat64, struct stat64); }; struct X64Arch : public BaseArch { static const size_t elfmachine = EM_X86_64; static const size_t elfendian = ELFDATA2LSB; static const MmapCallingSemantics mmap_semantics = RegisterArguments; static const CloneTLSType clone_tls_type = PthreadStructurePointer; static const CloneParameterOrdering clone_parameter_ordering = FlagsStackParentChildTLS; static const SelectCallingSemantics select_semantics = SelectRegisterArguments; typedef uint32_t legacy_uid_t; typedef uint32_t legacy_gid_t; #include "SyscallEnumsX64.generated" // The kernel defines the segment registers and eflags as 64-bit quantities, // even though the segment registers are really 16-bit and eflags is // architecturally defined as 32-bit. GDB wants the segment registers and // eflags to appear as 32-bit quantities. From the perspective of providing // registers to GDB, it's easier if we define these registers as uint32_t // with extra padding. struct user_regs_struct { uint64_t r15; uint64_t r14; uint64_t r13; uint64_t r12; uint64_t rbp; uint64_t rbx; uint64_t r11; uint64_t r10; uint64_t r9; uint64_t r8; uint64_t rax; uint64_t rcx; uint64_t rdx; uint64_t rsi; uint64_t rdi; // Unsigned type matches , but we need to treat this as // signed in practice. uint64_t orig_rax; uint64_t rip; uint32_t cs; uint32_t cs_upper; uint32_t eflags; uint32_t eflags_upper; uint64_t rsp; uint32_t ss; uint32_t ss_upper; // These _base registers are architecturally defined MSRs and really do // need to be 64-bit. uint64_t fs_base; uint64_t gs_base; uint32_t ds; uint32_t ds_upper; uint32_t es; uint32_t es_upper; uint32_t fs; uint32_t fs_upper; uint32_t gs; uint32_t gs_upper; }; RR_VERIFY_TYPE_ARCH(SupportedArch::x86_64, ::user_regs_struct, user_regs_struct); struct user_fpregs_struct { uint16_t cwd; uint16_t swd; uint16_t ftw; uint16_t fop; uint64_t rip; uint64_t rdp; uint32_t mxcsr; uint32_t mxcr_mask; uint32_t st_space[32]; uint32_t xmm_space[64]; uint32_t padding[24]; }; RR_VERIFY_TYPE_ARCH(SupportedArch::x86_64, ::user_fpregs_struct, user_fpregs_struct); struct user { struct user_regs_struct regs; int u_fpvalid; struct user_fpregs_struct i387; uint64_t u_tsize; uint64_t u_dsize; uint64_t u_ssize; uint64_t start_code; uint64_t start_stack; int64_t signal; int reserved; union { struct user_regs_struct* u_ar0; uint64_t __u_ar0_word; }; union { struct user_fpregs_struct* u_fpstate; uint64_t __u_fpstate_word; }; uint64_t magic; char u_comm[32]; uint64_t u_debugreg[8]; }; RR_VERIFY_TYPE_ARCH(SupportedArch::x86_64, ::user, user); struct stat { dev_t st_dev; ino_t st_ino; nlink_t st_nlink; mode_t st_mode; uid_t st_uid; gid_t st_gid; int __pad0; dev_t st_rdev; off_t st_size; blksize_t st_blksize; blkcnt_t st_blocks; struct timespec st_atim; struct timespec st_mtim; struct timespec st_ctim; syscall_slong_t __unused[3]; }; RR_VERIFY_TYPE_ARCH(SupportedArch::x86_64, struct ::stat, struct stat); struct stat64 { dev_t st_dev; ino_t st_ino; nlink_t st_nlink; mode_t st_mode; uid_t st_uid; gid_t st_gid; int __pad0; dev_t st_rdev; off_t st_size; blksize_t st_blksize; blkcnt_t st_blocks; struct timespec st_atim; struct timespec st_mtim; struct timespec st_ctim; syscall_slong_t __unused[3]; }; RR_VERIFY_TYPE_ARCH(SupportedArch::x86_64, struct ::stat64, struct stat64); }; #define RR_ARCH_FUNCTION(f, arch, args...) \ switch (arch) { \ default: \ assert(0 && "Unknown architecture"); \ case x86: \ return f(args); \ case x86_64: \ return f(args); \ } #include "SyscallHelperFunctions.generated" /** * Return true if |ptr| in task |t| points to an invoke-syscall instruction. */ bool is_at_syscall_instruction(Task* t, remote_code_ptr ptr); /** * Return the code bytes of an invoke-syscall instruction. The vector must * have the length given by |syscall_instruction_length|. */ std::vector syscall_instruction(SupportedArch arch); /** * Return the length of all invoke-syscall instructions. Currently, * they must all have the same length! */ ssize_t syscall_instruction_length(SupportedArch arch); #if defined(__i386__) typedef X86Arch NativeArch; #elif defined(__x86_64__) typedef X64Arch NativeArch; #else #error need to define new NativeArch #endif } // namespace rr #endif /* RR_KERNEL_ABI_H */ rr-4.1.0/src/kernel_metadata.cc000066400000000000000000000164101265436462100163700ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "kernel_metadata.h" #include #include #include #include #include #include "kernel_abi.h" #include "kernel_supplement.h" #include "log.h" using namespace rr; using namespace std; #include "SyscallnameArch.generated" string syscall_name(int syscall, SupportedArch arch) { RR_ARCH_FUNCTION(syscallname_arch, arch, syscall) } #define CASE(_id) \ case _id: \ return #_id; const char* ptrace_event_name(int event) { switch (event) { CASE(PTRACE_EVENT_FORK); CASE(PTRACE_EVENT_VFORK); CASE(PTRACE_EVENT_CLONE); CASE(PTRACE_EVENT_EXEC); CASE(PTRACE_EVENT_VFORK_DONE); CASE(PTRACE_EVENT_EXIT); /* XXX Ubuntu 12.04 defines a "PTRACE_EVENT_STOP", but that * has the same value as the newer EVENT_SECCOMP, so we'll * ignore STOP. */ CASE(PTRACE_EVENT_SECCOMP_OBSOLETE); CASE(PTRACE_EVENT_SECCOMP); CASE(PTRACE_EVENT_STOP); default: return "???PTRACE_EVENT"; } } const char* ptrace_req_name(int request) { switch (int(request)) { CASE(PTRACE_TRACEME); CASE(PTRACE_PEEKTEXT); CASE(PTRACE_PEEKDATA); CASE(PTRACE_PEEKUSER); CASE(PTRACE_POKETEXT); CASE(PTRACE_POKEDATA); CASE(PTRACE_POKEUSER); CASE(PTRACE_CONT); CASE(PTRACE_KILL); CASE(PTRACE_SINGLESTEP); CASE(PTRACE_GETREGS); CASE(PTRACE_SETREGS); CASE(PTRACE_GETFPREGS); CASE(PTRACE_SETFPREGS); CASE(PTRACE_ATTACH); CASE(PTRACE_DETACH); CASE(PTRACE_GETFPXREGS); CASE(PTRACE_SETFPXREGS); CASE(PTRACE_SYSCALL); CASE(PTRACE_SETOPTIONS); CASE(PTRACE_GETEVENTMSG); CASE(PTRACE_GETSIGINFO); CASE(PTRACE_SETSIGINFO); CASE(PTRACE_GETREGSET); CASE(PTRACE_SETREGSET); CASE(PTRACE_SEIZE); CASE(PTRACE_INTERRUPT); CASE(PTRACE_LISTEN); // These aren't part of the official ptrace-request enum. CASE(PTRACE_SYSEMU); CASE(PTRACE_SYSEMU_SINGLESTEP); default: return "???PTRACE_REQ"; } } const char* signal_name(int sig) { /* strsignal() would be nice to use here, but it provides TMI. */ if (32 <= sig && sig <= 64) { static __thread char buf[] = "SIGRT00000000"; snprintf(buf, sizeof(buf) - 1, "SIGRT%d", sig); return buf; } switch (sig) { CASE(SIGHUP); CASE(SIGINT); CASE(SIGQUIT); CASE(SIGILL); CASE(SIGTRAP); CASE(SIGABRT); /*CASE(SIGIOT);*/ CASE(SIGBUS); CASE(SIGFPE); CASE(SIGKILL); CASE(SIGUSR1); CASE(SIGSEGV); CASE(SIGUSR2); CASE(SIGPIPE); CASE(SIGALRM); CASE(SIGTERM); CASE(SIGSTKFLT); /*CASE(SIGCLD);*/ CASE(SIGCHLD); CASE(SIGCONT); CASE(SIGSTOP); CASE(SIGTSTP); CASE(SIGTTIN); CASE(SIGTTOU); CASE(SIGURG); CASE(SIGXCPU); CASE(SIGXFSZ); CASE(SIGVTALRM); CASE(SIGPROF); CASE(SIGWINCH); /*CASE(SIGPOLL);*/ CASE(SIGIO); CASE(SIGPWR); CASE(SIGSYS); default: return "???signal"; } } bool is_sigreturn(int syscallno, SupportedArch arch) { return is_sigreturn_syscall(syscallno, arch) || is_rt_sigreturn_syscall(syscallno, arch); } string errno_name(int err) { switch (err) { case 0: return "SUCCESS"; CASE(EPERM); CASE(ENOENT); CASE(ESRCH); CASE(EINTR); CASE(EIO); CASE(ENXIO); CASE(E2BIG); CASE(ENOEXEC); CASE(EBADF); CASE(ECHILD); CASE(EAGAIN); CASE(ENOMEM); CASE(EACCES); CASE(EFAULT); CASE(ENOTBLK); CASE(EBUSY); CASE(EEXIST); CASE(EXDEV); CASE(ENODEV); CASE(ENOTDIR); CASE(EISDIR); CASE(EINVAL); CASE(ENFILE); CASE(EMFILE); CASE(ENOTTY); CASE(ETXTBSY); CASE(EFBIG); CASE(ENOSPC); CASE(ESPIPE); CASE(EROFS); CASE(EMLINK); CASE(EPIPE); CASE(EDOM); CASE(ERANGE); CASE(EDEADLK); CASE(ENAMETOOLONG); CASE(ENOLCK); CASE(ENOSYS); CASE(ENOTEMPTY); CASE(ELOOP); CASE(ENOMSG); CASE(EIDRM); CASE(ECHRNG); CASE(EL2NSYNC); CASE(EL3HLT); CASE(EL3RST); CASE(ELNRNG); CASE(EUNATCH); CASE(ENOCSI); CASE(EL2HLT); CASE(EBADE); CASE(EBADR); CASE(EXFULL); CASE(ENOANO); CASE(EBADRQC); CASE(EBADSLT); CASE(EBFONT); CASE(ENOSTR); CASE(ENODATA); CASE(ETIME); CASE(ENOSR); CASE(ENONET); CASE(ENOPKG); CASE(EREMOTE); CASE(ENOLINK); CASE(EADV); CASE(ESRMNT); CASE(ECOMM); CASE(EPROTO); CASE(EMULTIHOP); CASE(EDOTDOT); CASE(EBADMSG); CASE(EOVERFLOW); CASE(ENOTUNIQ); CASE(EBADFD); CASE(EREMCHG); CASE(ELIBACC); CASE(ELIBBAD); CASE(ELIBSCN); CASE(ELIBMAX); CASE(ELIBEXEC); CASE(EILSEQ); CASE(ERESTART); CASE(ESTRPIPE); CASE(EUSERS); CASE(ENOTSOCK); CASE(EDESTADDRREQ); CASE(EMSGSIZE); CASE(EPROTOTYPE); CASE(ENOPROTOOPT); CASE(EPROTONOSUPPORT); CASE(ESOCKTNOSUPPORT); CASE(EOPNOTSUPP); CASE(EPFNOSUPPORT); CASE(EAFNOSUPPORT); CASE(EADDRINUSE); CASE(EADDRNOTAVAIL); CASE(ENETDOWN); CASE(ENETUNREACH); CASE(ENETRESET); CASE(ECONNABORTED); CASE(ECONNRESET); CASE(ENOBUFS); CASE(EISCONN); CASE(ENOTCONN); CASE(ESHUTDOWN); CASE(ETOOMANYREFS); CASE(ETIMEDOUT); CASE(ECONNREFUSED); CASE(EHOSTDOWN); CASE(EHOSTUNREACH); CASE(EALREADY); CASE(EINPROGRESS); CASE(ESTALE); CASE(EUCLEAN); CASE(ENOTNAM); CASE(ENAVAIL); CASE(EISNAM); CASE(EREMOTEIO); CASE(EDQUOT); CASE(ENOMEDIUM); CASE(EMEDIUMTYPE); CASE(ECANCELED); CASE(ENOKEY); CASE(EKEYEXPIRED); CASE(EKEYREVOKED); CASE(EKEYREJECTED); CASE(EOWNERDEAD); CASE(ENOTRECOVERABLE); CASE(ERFKILL); CASE(EHWPOISON); default: { char buf[100]; sprintf(buf, "errno(%d)", err); return string(buf); } } } const char* sicode_name(int code, int sig) { switch (code) { CASE(SI_USER); CASE(SI_KERNEL); CASE(SI_QUEUE); CASE(SI_TIMER); CASE(SI_MESGQ); CASE(SI_ASYNCIO); CASE(SI_SIGIO); CASE(SI_TKILL); } switch (sig) { case SIGSEGV: switch (code) { CASE(SEGV_MAPERR); CASE(SEGV_ACCERR); } case SIGTRAP: switch (code) { CASE(TRAP_BRKPT); CASE(TRAP_TRACE); } } return "???sicode"; } std::ostream& operator<<(std::ostream& stream, const siginfo_t& siginfo) { stream << "{signo:" << signal_name(siginfo.si_signo) << ",errno:" << errno_name(siginfo.si_errno) << ",code:" << sicode_name(siginfo.si_code, siginfo.si_signo); switch (siginfo.si_signo) { case SIGILL: case SIGFPE: case SIGSEGV: case SIGBUS: case SIGTRAP: stream << ",addr:" << siginfo.si_addr; break; } stream << "}"; return stream; } int shm_flags_to_mmap_prot(int flags) { return PROT_READ | ((flags & SHM_RDONLY) ? 0 : PROT_WRITE) | ((flags & SHM_EXEC) ? PROT_EXEC : 0); } rr-4.1.0/src/kernel_metadata.h000066400000000000000000000023541265436462100162340ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_SYSCALLS_H_ #define RR_SYSCALLS_H_ #include #include #include #include "kernel_abi.h" /** * Return the symbolic name of |syscall|, f.e. "read", or "???syscall" * if unknown. */ std::string syscall_name(int syscall, SupportedArch arch); /** * Return the symbolic name of the PTRACE_EVENT_* |event|, or * "???EVENT" if unknown. */ const char* ptrace_event_name(int event); /** * Return the symbolic name of the PTRACE_ |request|, or "???REQ" if * unknown. */ const char* ptrace_req_name(int request); /** * Return the symbolic name of |sig|, f.e. "SIGILL", or "???signal" if * unknown. */ const char* signal_name(int sig); /** * Return true if this is some kind of sigreturn syscall. */ bool is_sigreturn(int syscall, SupportedArch arch); /** * Return the symbolic error name (e.g. "EINVAL") for errno. */ std::string errno_name(int err); /** * Return the symbolic name (e.g. "SI_USER") for an si_code. */ const char* sicode_name(int code, int sig); /** * Print siginfo on ostream. */ std::ostream& operator<<(std::ostream& stream, const siginfo_t& siginfo); int shm_flags_to_mmap_prot(int flags); #endif rr-4.1.0/src/kernel_supplement.h000066400000000000000000000040441265436462100166460ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_KERNEL_SUPPLEMENT_H_ #define RR_KERNEL_SUPPLEMENT_H_ #include #include #include /* Definitions that should be part of system headers (and maybe are on some but * not all systems). * This should not contain anything for which rr needs all the definitions * across architectures; those definitions belong in kernel_abi.h. */ #define PTRACE_EVENT_NONE 0 #define PTRACE_EVENT_STOP 128 #define PTRACE_SYSEMU 31 #define PTRACE_SYSEMU_SINGLESTEP 32 #ifndef PTRACE_O_TRACESECCOMP #define PTRACE_O_TRACESECCOMP 0x00000080 #define PTRACE_EVENT_SECCOMP_OBSOLETE 8 // ubuntu 12.04 #define PTRACE_EVENT_SECCOMP 7 // ubuntu 12.10 and future kernels #endif #ifndef PTRACE_O_EXITKILL #define PTRACE_O_EXITKILL (1 << 20) #endif #ifndef SECCOMP_SET_MODE_STRICT #define SECCOMP_SET_MODE_STRICT 0 #endif #ifndef SECCOMP_SET_MODE_FILTER #define SECCOMP_SET_MODE_FILTER 1 #endif #ifndef SYS_SECCOMP #define SYS_SECCOMP 1 #endif // These are defined by the include/linux/errno.h in the kernel tree. // Since userspace doesn't see these errnos in normal operation, that // header apparently isn't distributed with libc. #define ERESTARTSYS 512 #define ERESTARTNOINTR 513 #define ERESTARTNOHAND 514 #define ERESTART_RESTARTBLOCK 516 // These definitions haven't made it out to current libc-dev packages // yet. #ifndef GRND_NONBLOCK #define GRND_NONBLOCK 0x0001 #define GRND_RANDOM 0x0002 #endif /* We need to complement sigsets in order to update the Task blocked * set, but POSIX doesn't appear to define a convenient helper. So we * define our own linux-compatible sig_set_t and use bit operators to * manipulate sigsets. */ typedef uint64_t sig_set_t; static_assert(_NSIG / 8 == sizeof(sig_set_t), "Update sig_set_t for _NSIG."); #ifndef MADV_DONTDUMP #define MADV_DONTDUMP 16 #endif #ifndef MADV_DODUMP #define MADV_DODUMP 17 #endif #ifndef MADV_SOFT_OFFLINE #define MADV_SOFT_OFFLINE 101 #endif #endif /* RR_KERNEL_SUPPLEMENT_H_ */ rr-4.1.0/src/log.cc000066400000000000000000000016461265436462100140360ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "log.h" #include "GdbServer.h" #include "RecordSession.h" static void emergency_debug(Task* t) { // Enable SIGINT in case it was disabled. Users want to be able to ctrl-C // out of this. struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_handler = SIG_DFL; sigaction(SIGINT, &sa, nullptr); RecordSession* record_session = t->session().as_record(); if (record_session) { record_session->trace_writer().close(); } if (probably_not_interactive() && !Flags::get().force_things) { errno = 0; FATAL() << "(session doesn't look interactive, aborting emergency debugging)"; } GdbServer::emergency_debug(t); FATAL() << "Can't resume execution from invalid state"; } EmergencyDebugOstream::~EmergencyDebugOstream() { log_stream() << std::endl; t->log_pending_events(); emergency_debug(t); } rr-4.1.0/src/log.h000066400000000000000000000116201265436462100136710ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_LOG_H #define RR_LOG_H #include #include "Flags.h" #include "task.h" enum LogLevel { LOG_fatal, LOG_error, LOG_warn, LOG_info, LOG_debug }; inline static bool logging_enabled_for(LogLevel level) { switch (level) { case LOG_fatal: case LOG_error: return true; case LOG_warn: case LOG_info: return Flags::get().verbose; case LOG_debug: // TODO make me dynamically-enable-able. #ifdef DEBUGTAG return true; #else return false; #endif default: return false; // not reached } } inline static const char* log_name(LogLevel level) { switch (level) { case LOG_fatal: return "FATAL"; case LOG_error: return "ERROR"; case LOG_warn: return "WARN"; case LOG_info: return "INFO"; default: return "???"; } } /** * Return the ostream to which log data will be written. * * Users can #define LOG_PATH to an arbitrary path, like * "/tmp/foo.log", to send data to that file instead of the default * stream (stderr). */ inline static std::ostream& log_stream() { #ifdef LOG_PATH static std::ofstream log(LOG_PATH); return log; #else return std::cerr; #endif } struct NewlineTerminatingOstream { NewlineTerminatingOstream(LogLevel level) : level(level) {} ~NewlineTerminatingOstream() { log_stream() << std::endl; if (Flags::get().fatal_errors_and_warnings && level <= LOG_warn) { abort(); } } operator std::ostream&() { return log_stream(); } LogLevel level; }; template NewlineTerminatingOstream& operator<<(NewlineTerminatingOstream& stream, const T& v) { log_stream() << v; return stream; } // TODO: support stream modifiers. struct FatalOstream { ~FatalOstream() { log_stream() << std::endl; abort(); } }; template FatalOstream& operator<<(FatalOstream& stream, const T& v) { log_stream() << v; return stream; } struct EmergencyDebugOstream { EmergencyDebugOstream(const Task* t) : t(const_cast(t)) {} ~EmergencyDebugOstream(); Task* t; }; template EmergencyDebugOstream& operator<<(EmergencyDebugOstream& stream, const T& v) { log_stream() << v; return stream; } template inline static T& prepare_log_stream(T&& stream, LogLevel level, const char* file, int line, const char* function, const Task* t = nullptr, const char* pfx = nullptr) { int err = errno; #ifdef DEBUGTAG if (LOG_debug == level) { #ifdef LOG_STREAM return LOG_STREAM << "[" << DEBUGTAG << "] "; #else return stream << "[" << DEBUGTAG << "] "; #endif } #endif // DEBUGTAG stream << "[" << log_name(level) << " "; if (level <= LOG_error) { stream << file << ":" << line << ":"; } stream << function << "()"; if (level <= LOG_warn) { stream << " errno: " << err << " '" << strerror(err) << "'"; } stream << "] "; if (t) { stream << "\n (task " << t->tid << " (rec:" << t->rec_tid << ") at time " << t->trace_time() << ")"; } if (level <= LOG_error) { stream << "\n -> "; } if (pfx) { stream << pfx; } return stream; } /** * Write logging output at the given level, which can be one of |{ * error, warn, info, debug }| in decreasing order of severity. */ #define LOG(_level) \ if (logging_enabled_for(LOG_##_level)) \ prepare_log_stream(NewlineTerminatingOstream(LOG_##_level), LOG_##_level, \ __FILE__, __LINE__, __FUNCTION__) /** A fatal error has occurred. Log the error and exit. */ #define FATAL() \ prepare_log_stream(FatalOstream(), LOG_fatal, __FILE__, __LINE__, \ __FUNCTION__) /** * Assert a condition related to a Task. If the condition fails, an * emergency debugger for the task is launched. */ #define ASSERT(_t, _cond) \ if (!(_cond)) \ prepare_log_stream(EmergencyDebugOstream(_t), LOG_fatal, __FILE__, __LINE__, \ __FUNCTION__, (_t), \ " Assertion `" #_cond "' failed to hold. ") /** * Ensure that |_v| is streamed in hex format. * We make sure that signed types are *not* sign-extended. */ inline void* HEX(uint64_t v) { return reinterpret_cast(v); } inline void* HEX(int64_t v) { return reinterpret_cast(v); } inline void* HEX(uint32_t v) { return reinterpret_cast(v); } inline void* HEX(int32_t v) { return reinterpret_cast(uint32_t(v)); } #endif // RR_LOG_H rr-4.1.0/src/main.cc000066400000000000000000000206531265436462100142000ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "main.h" #include #include #include #include #include #include #include "Command.h" #include "Flags.h" #include "log.h" #include "RecordCommand.h" using namespace std; // Show version and quit. static bool show_version = false; void assert_prerequisites(bool use_syscall_buffer) { struct utsname uname_buf; memset(&uname_buf, 0, sizeof(uname_buf)); if (!uname(&uname_buf)) { unsigned int major, minor; char dot; stringstream stream(uname_buf.release); stream >> major >> dot >> minor; if (KERNEL_VERSION(major, minor, 0) < KERNEL_VERSION(3, 4, 0)) { FATAL() << "Kernel doesn't support necessary ptrace " << "functionality; need 3.4.0 or better."; } if (use_syscall_buffer && KERNEL_VERSION(major, minor, 0) < KERNEL_VERSION(3, 5, 0)) { FATAL() << "Your kernel does not support syscall " << "filtering; please use the -n option"; } } } void check_performance_settings() { if (Flags::get().suppress_environment_warnings) { return; } // NB: we hard-code "cpu0" here because rr pins itself and all // tracees to cpu 0. We don't care about the other CPUs. ScopedFd fd("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor", O_RDONLY); if (0 > fd) { // If the file doesn't exist, the system probably // doesn't have the ability to frequency-scale, for // example a VM. LOG(info) << "Unable to check CPU-frequency governor."; return; } char governor[PATH_MAX]; ssize_t nread = read(fd, governor, sizeof(governor) - 1); if (0 > nread) { FATAL() << "Unable to read cpu0's frequency governor."; } governor[nread] = '\0'; ssize_t len = strlen(governor); if (len > 0) { // Eat the '\n'. governor[len - 1] = '\0'; } LOG(info) << "cpu0's frequency governor is '" << governor << "'"; if (strcmp("performance", governor)) { fprintf(stderr, "\n" "rr: Warning: Your CPU frequency governor is '%s'. rr strongly\n" " recommends that you use the 'performance' governor. Not " "using the\n" " 'performance' governor can cause rr to be at least 2x slower\n" " on laptops.\n" "\n" " On Fedora-based systems, you can enable the 'performance' " "governor\n" " by running the following commands:\n" "\n" " $ sudo dnf install kernel-tools\n" " $ sudo cpupower frequency-set -g performance\n" "\n", governor); // TODO: It would be nice to bail here or do something // clever to enable the 'performance' just for rr, but // that seems too hard at the moment. } } void print_version(FILE* out) { fprintf(out, "rr version %s\n", RR_VERSION); } void print_usage(FILE* out) { print_version(out); fputs("Usage:\n", out); Command::print_help_all(out); fputs( "\n" "Common options:\n" " -A, --microarch= force rr to assume it's running on a CPU\n" " with microarch NAME even if runtime " "detection\n" " says otherwise. NAME should be a string " "like\n" " 'Ivy Bridge'.\n" " -C, --checksum={on-syscalls,on-all-events}|FROM_TIME\n" " compute and store (during recording) or\n" " read and verify (during replay) checksums\n" " of each of a tracee's memory mappings " "either\n" " at the end of all syscalls " "(`on-syscalls'),\n" " at all events (`on-all-events'), or \n" " starting from a global timepoint " "FROM_TIME\n" " -D, --dump-on=\n" " dump memory at SYSCALL or SIGNAL to the\n" " file " "`[trace_dir]/[tid].[time]_{rec,rep}':\n" " `_rec' for dumps during recording, `_rep'\n" " for dumps during replay\n" " -F, --force-things force rr to do some things that don't " "seem\n" " like good ideas, for example launching an\n" " interactive emergency debugger if stderr\n" " isn't a tty.\n" " -K, --check-cached-mmaps verify that cached task mmaps match " "/proc/maps\n" " -E, --fatal-errors any warning or error that is printed is\n" " treated as fatal\n" " -M, --mark-stdio mark stdio writes with [rr ]\n" " where EV is the global trace time at\n" " which the write occurs and PID is the pid\n" " of the process it occurs in.\n" " -N, --version print the version number and exit\n" " -S, --suppress-environment-warnings\n" " suppress warnings about issues in the\n" " environment that rr has no control over\n" " -T, --dump-at=TIME dump memory at global timepoint TIME\n" " -V, --verbose log messages that may not be urgently \n" " critical to the user\n" " -W, --wait-secs= wait NUM_SECS seconds just after startup,\n" " before initiating recording or replaying\n", out); } static void init_random() { // Not very good, but good enough for our non-security-sensitive needs. srandom(time(nullptr) ^ getpid()); } bool parse_global_option(std::vector& args) { static const OptionSpec options[] = { { 'C', "checksum", HAS_PARAMETER }, { 'K', "check-cached-mmaps", NO_PARAMETER }, { 'U', "cpu-unbound", NO_PARAMETER }, { 'T', "dump-at", HAS_PARAMETER }, { 'D', "dump-on", HAS_PARAMETER }, { 'F', "force-things", NO_PARAMETER }, { 'A', "microarch", HAS_PARAMETER }, { 'M', "mark-stdio", NO_PARAMETER }, { 'S', "suppress-environment-warnings", NO_PARAMETER }, { 'E', "fatal-errors", NO_PARAMETER }, { 'V', "verbose", NO_PARAMETER }, { 'N', "version", NO_PARAMETER } }; ParsedOption opt; if (!Command::parse_option(args, options, &opt)) { return false; } Flags& flags = Flags::get_for_init(); switch (opt.short_name) { case 'A': flags.forced_uarch = opt.value; break; case 'C': if (opt.value == "on-syscalls") { LOG(info) << "checksumming on syscall exit"; flags.checksum = Flags::CHECKSUM_SYSCALL; } else if (opt.value == "on-all-events") { LOG(info) << "checksumming on all events"; flags.checksum = Flags::CHECKSUM_ALL; } else { flags.checksum = atoi(opt.value.c_str()); LOG(info) << "checksumming on at event " << flags.checksum; } break; case 'D': flags.dump_on = atoi(opt.value.c_str()); break; case 'E': flags.fatal_errors_and_warnings = true; break; case 'F': flags.force_things = true; break; case 'K': flags.check_cached_mmaps = true; break; case 'M': flags.mark_stdio = true; break; case 'S': flags.suppress_environment_warnings = true; break; case 'T': flags.dump_at = atoi(opt.value.c_str()); break; case 'V': flags.verbose = true; break; case 'N': show_version = true; break; default: assert(0 && "Invalid flag"); } return true; } int main(int argc, char* argv[]) { init_random(); vector args; for (int i = 1; i < argc; ++i) { args.push_back(argv[i]); } while (parse_global_option(args)) { } if (show_version) { print_version(stdout); return 0; } if (args.size() == 0) { print_usage(stderr); return 1; } auto command = Command::command_for_name(args[0]); if (command) { args.erase(args.begin()); } else { if (!Command::verify_not_option(args)) { print_usage(stderr); return 1; } command = RecordCommand::get(); } return command->run(args); } rr-4.1.0/src/main.h000066400000000000000000000005511265436462100140350ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_MAIN_H_ #define RR_MAIN_H_ #include #include void assert_prerequisites(bool use_syscall_buffer = false); void check_performance_settings(); void print_usage(FILE*); bool parse_global_option(std::vector& args); #endif // RR_MAIN_H_ rr-4.1.0/src/preload/000077500000000000000000000000001265436462100143655ustar00rootroot00000000000000rr-4.1.0/src/preload/breakpoint_table.S000066400000000000000000000010221265436462100200110ustar00rootroot00000000000000#if defined(__i386__) || defined(__x86_64__) .text .global _breakpoint_table_entry_start .hidden _breakpoint_table_entry_start _breakpoint_table_entry_start: ret .global _breakpoint_table_entry_end .hidden _breakpoint_table_entry_end _breakpoint_table_entry_end: .rept 131071 /* SYSCALLBUF_BUFFER_SIZE/8 - 1 */ ret .endr #else #error unknown CPU architecture #endif /* __i386__/__x86_64__ */ .section .note.GNU-stack,"",@progbits .previous rr-4.1.0/src/preload/preload.c000066400000000000000000002155011265436462100161630ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "rrpreload" #define RR_IMPLEMENT_PRELOAD #ifndef _GNU_SOURCE #define _GNU_SOURCE 1 #endif #include "preload_interface.h" /** * Buffer syscalls, so that rr can process the entire buffer with one * trap instead of a trap per call. * * This file is compiled into a dso that's PRELOADed in recorded * applications. The dso replaces libc syscall wrappers with our own * implementation that saves nondetermistic outparams in a fixed-size * buffer. When the buffer is full or the recorded application * invokes an un-buffered syscall or receives a signal, we trap to rr * and it records the state of the buffer. * * During replay, rr simply refills the buffer with the recorded data * when it reaches the "flush-buffer" events that were recorded. Then * rr emulates each buffered syscall, and the code here restores the * client data from the refilled buffer. * * The crux of the implementation here is to selectively ptrace-trap * syscalls. The normal (un-buffered) syscalls generate a ptrace * trap, and the buffered syscalls trap directly to the kernel. This * is implemented with a seccomp-bpf which examines the syscall and * decides how to handle it (see seccomp-bpf.h and Task::spawn). * * Because this code runs in the tracee's address space and overrides * system calls, the code is rather delicate. The following rules * must be followed * * o No rr headers (other than seccomp-bpf.h and rr.h) may be included * o All syscalls invoked by this code must be called directly, not * through libc wrappers (which this file may itself indirectly override) */ /** * We also use this preload library to disable XShm by overriding * XShmQueryExtension. */ #include #include #include #include #include #include #include #include #include #include "rr/rr.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* NB: don't include any other local headers here. */ #ifdef memcpy #undef memcpy #endif #define memcpy you_must_use_local_memcpy #ifdef syscall #undef syscall #endif #define syscall you_must_use_traced_syscall /* x86 is the only architecture whose syscalls all come through a pinch point that we can monkeypatch. There are ways to handle other architectures, but for now, we can't filter on any architecture but x86. */ #if defined(__i386__) #define RR_SYSCALL_FILTERING 1 #else #define RR_SYSCALL_FILTERING 0 #endif #define RR_HIDDEN __attribute__((visibility("hidden"))) /** * Represents syscall params. Makes it simpler to pass them around, * and avoids pushing/popping all the data for calls. */ struct syscall_info { long no; long args[6]; }; /* Nonzero when syscall buffering is enabled. */ static int buffer_enabled; /* Nonzero after process-global state has been initialized. */ static int process_inited; /* 0 during recording, 1 during replay. * This MUST NOT be used in conditional branches. It should only be used * as the condition for conditional moves so that control flow during replay * does not diverge from control flow during recording. * We also have to be careful that values different between record and replay * don't accidentally leak into other memory locations or registers. * USE WITH CAUTION. */ static unsigned char in_replay; /** * If syscallbuf_fds_disabled[fd] is nonzero, then operations on that fd * must be performed through traced syscalls, not the syscallbuf. * The rr supervisor modifies this array directly to dynamically turn * syscallbuf on and off for particular fds. fds outside the array range must * never use the syscallbuf. */ static volatile char syscallbuf_fds_disabled[SYSCALLBUF_FDS_DISABLED_SIZE]; /** * Because this library is always loaded via LD_PRELOAD, we can use the * initial-exec TLS model (see http://www.akkadia.org/drepper/tls.pdf) which * lets the compiler generate better code which, crucially, does not call * helper functions outside of our library. */ #define TLS_STORAGE_MODEL __attribute__((tls_model("initial-exec"))) /* Nonzero when thread-local state like the syscallbuf has been * initialized. */ static __thread int thread_inited TLS_STORAGE_MODEL; /* When buffering is enabled, points at the thread's mapped buffer * segment. At the start of the segment is an object of type |struct * syscallbuf_hdr|, so |buffer| is also a pointer to the buffer * header. */ static __thread uint8_t* buffer TLS_STORAGE_MODEL; /* This is used to support the buffering of "may-block" system calls. * The problem that needs to be addressed can be introduced with a * simple example; assume that we're buffering the "read" and "write" * syscalls. * * o (Tasks W and R set up a synchronous-IO pipe open between them; W * "owns" the write end of the pipe; R owns the read end; the pipe * buffer is full) * o Task W invokes the write syscall on the pipe * o Since write is a buffered syscall, the seccomp filter traps W * directly to the kernel; there's no trace event for W delivered * to rr. * o The pipe is full, so W is descheduled by the kernel because W * can't make progress. * o rr thinks W is still running and doesn't schedule R. * * At this point, progress in the recorded application can only be * made by scheduling R, but no one tells rr to do that. Oops! * * Thus enter the "desched counter". It's a perf_event for the "sw t * switches" event (which, more precisely, is "sw deschedule"; it * counts schedule-out, not schedule-in). We program the counter to * deliver a signal to this task when there's new counter data * available. And we set up the "sample period", how many descheds * are triggered before the signal is delivered, to be "1". This * means that when the counter is armed, the next desched (i.e., the * next time the desched counter is bumped up) of this task will * deliver the signal to it. And signal delivery always generates a * ptrace trap, so rr can deduce that this task was descheduled and * schedule another. * * The description above is sort of an idealized view; there are * numerous implementation details that are documented in * handle_signal.c, where they're dealt with. */ static __thread int desched_counter_fd TLS_STORAGE_MODEL; /* Points at the libc/pthread pthread_create(). We wrap * pthread_create, so need to retain this pointer to call out to the * libc version. There is no __pthread_create stub to call. There are * some explicitly-versioned stubs but let's not use those. */ static int (*real_pthread_create)(pthread_t* thread, const pthread_attr_t* attr, void* (*start_routine)(void*), void* arg); static int (*real_pthread_mutex_timedlock)(pthread_mutex_t* mutex, const struct timespec* abstime); /** * Return a pointer to the buffer header, which happens to occupy the * initial bytes in the mapped region. */ static struct syscallbuf_hdr* buffer_hdr(void) { return (struct syscallbuf_hdr*)buffer; } /** * Return a pointer to the byte just after the last valid syscall record in * the buffer. */ static uint8_t* buffer_last(void) { return (uint8_t*)next_record(buffer_hdr()); } /** * Return a pointer to the byte just after the very end of the mapped * region. */ static uint8_t* buffer_end(void) { return buffer + SYSCALLBUF_BUFFER_SIZE; } /** * Same as libc memcpy(), but usable within syscallbuf transaction * critical sections. */ static void local_memcpy(void* dest, const void* source, int n) { #if defined(__i386__) || defined(__x86_64__) /* On modern x86-ish CPUs rep movsb is fast, usually able to move * 64 bytes at a time. */ __asm__ __volatile__("rep movsb\n\t" : "+S"(source), "+D"(dest), "+c"(n) : : "cc", "memory"); #else #error Unknown architecture #endif } /* The following are wrappers for the syscalls invoked by this library * itself. These syscalls will generate ptrace traps. * stack_param_1 and stack_param_2 are pushed onto the stack just before * the syscall, for SYS_rrcall_notify_syscall_hook_exit which takes stack * parameters as well as register parameters. * syscall_instruction is the actual syscall invocation instruction * (a function which we call with the registers set up appropriately). */ extern RR_HIDDEN long _raw_syscall(int syscallno, long a0, long a1, long a2, long a3, long a4, long a5, void* syscall_instruction, long stack_param_1, long stack_param_2); static int update_errno_ret(long ret) { /* EHWPOISON is the last known errno as of linux 3.9.5. */ if (0 > ret && ret >= -EHWPOISON) { errno = -ret; ret = -1; } return ret; } static void* traced_syscall_instruction = (void*)(RR_PAGE_IN_TRACED_SYSCALL_ADDR - RR_PAGE_SYSCALL_INSTRUCTION_END); static void* untraced_syscall_instruction = (void*)(RR_PAGE_IN_UNTRACED_SYSCALL_ADDR - RR_PAGE_SYSCALL_INSTRUCTION_END); static void* untraced_replayed_syscall_instruction = (void*)(RR_PAGE_IN_UNTRACED_REPLAYED_SYSCALL_ADDR - RR_PAGE_SYSCALL_INSTRUCTION_END); static void* privileged_traced_syscall_instruction = (void*)(RR_PAGE_IN_PRIVILEGED_TRACED_SYSCALL_ADDR - RR_PAGE_SYSCALL_INSTRUCTION_END); static void* privileged_untraced_syscall_instruction = (void*)(RR_PAGE_IN_PRIVILEGED_UNTRACED_SYSCALL_ADDR - RR_PAGE_SYSCALL_INSTRUCTION_END); static int privileged_traced_syscall(int syscallno, long a0, long a1, long a2, long a3, long a4, long a5) { long ret = _raw_syscall(syscallno, a0, a1, a2, a3, a4, a5, privileged_traced_syscall_instruction, 0, 0); return update_errno_ret(ret); } #define privileged_traced_syscall6(no, a0, a1, a2, a3, a4, a5) \ privileged_traced_syscall(no, (uintptr_t)a0, (uintptr_t)a1, (uintptr_t)a2, \ (uintptr_t)a3, (uintptr_t)a4, (uintptr_t)a5) #define privileged_traced_syscall5(no, a0, a1, a2, a3, a4) \ privileged_traced_syscall6(no, a0, a1, a2, a3, a4, 0) #define privileged_traced_syscall4(no, a0, a1, a2, a3) \ privileged_traced_syscall5(no, a0, a1, a2, a3, 0) #define privileged_traced_syscall3(no, a0, a1, a2) \ privileged_traced_syscall4(no, a0, a1, a2, 0) #define privileged_traced_syscall2(no, a0, a1) \ privileged_traced_syscall3(no, a0, a1, 0) #define privileged_traced_syscall1(no, a0) privileged_traced_syscall2(no, a0, 0) #define privileged_traced_syscall0(no) privileged_traced_syscall1(no, 0) /** * Make a raw traced syscall using the params in |call|. "Raw" traced * syscalls return the raw kernel return value, and don't transform it * to -1/errno per POSIX semantics. */ static long traced_raw_syscall(const struct syscall_info* call) { /* FIXME: pass |call| to avoid pushing these on the stack * again. */ return _raw_syscall(call->no, call->args[0], call->args[1], call->args[2], call->args[3], call->args[4], call->args[5], traced_syscall_instruction, 0, 0); } #if defined(SYS_fcntl64) #define RR_FCNTL_SYSCALL SYS_fcntl64 #else #define RR_FCNTL_SYSCALL SYS_fcntl #endif static int privileged_traced_fcntl(int fd, int cmd, ...) { va_list ap; void* arg; va_start(ap, cmd); arg = va_arg(ap, void*); va_end(ap); return privileged_traced_syscall3(RR_FCNTL_SYSCALL, fd, cmd, arg); } static pid_t privileged_traced_getpid(void) { return privileged_traced_syscall0(SYS_getpid); } static pid_t privileged_traced_gettid(void) { return privileged_traced_syscall0(SYS_gettid); } static int privileged_traced_perf_event_open(struct perf_event_attr* attr, pid_t pid, int cpu, int group_fd, unsigned long flags) { return privileged_traced_syscall5(SYS_perf_event_open, attr, pid, cpu, group_fd, flags); } static int privileged_traced_raise(int sig) { return privileged_traced_syscall2(SYS_kill, privileged_traced_getpid(), sig); } static ssize_t privileged_traced_write(int fd, const void* buf, size_t count) { return privileged_traced_syscall3(SYS_write, fd, buf, count); } /* We can't use the rr logging helpers because they rely on libc * syscall-invoking functions, so roll our own here. * * XXX just use these for all logging? */ __attribute__((format(printf, 1, 2))) static void logmsg(const char* msg, ...) { va_list args; char buf[1024]; int len; va_start(args, msg); len = vsnprintf(buf, sizeof(buf) - 1, msg, args); va_end(args); privileged_traced_write(STDERR_FILENO, buf, len); } #ifndef NDEBUG #define assert(cond) \ do { \ if (!(cond)) { \ logmsg("%s:%d: Assertion `" #cond "' failed.\n", __FILE__, __LINE__); \ privileged_traced_raise(SIGABRT); \ } \ } while (0) #else #define assert(cond) ((void)0) #endif #define fatal(msg, ...) \ do { \ logmsg("[FATAL] (%s:%d: errno: %s: tid: %d) " msg "\n", __FILE__, \ __LINE__, strerror(errno), privileged_traced_gettid(), \ ##__VA_ARGS__); \ privileged_traced_syscall1(SYS_exit_group, EX_OSERR); \ } while (0) #ifdef DEBUGTAG #define debug(msg, ...) logmsg("[" DEBUGTAG "] " msg "\n", ##__VA_ARGS__) #else #define debug(msg, ...) ((void)0) #endif /** * Unlike |traced_syscall()|, this helper is implicitly "raw" (returns * the direct kernel return value), because the syscall hooks have to * save that raw return value. * This is only called from syscall wrappers that are doing a proper * buffered syscall. */ static long untraced_syscall_base(int syscallno, long a0, long a1, long a2, long a3, long a4, long a5, void* syscall_instruction) { struct syscallbuf_record* rec = (struct syscallbuf_record*)buffer_last(); long ret = _raw_syscall(syscallno, a0, a1, a2, a3, a4, a5, syscall_instruction, 0, 0); unsigned char tmp_in_replay = in_replay; /* During replay, return the result that's already in the buffer, instead of what our "syscall" returned. */ #if defined(__i386__) || defined(__x86_64__) /* On entry, during recording %eax/%rax are whatever the kernel returned * but during replay they may be invalid (e.g. 0). During replay, reload * %eax/%rax from |rec->ret|. At the end of this sequence all registers * will match between recording and replay. We clobber the temporary * in_replay register, and the condition codes, to ensure this. * This all assumes the compiler doesn't create unnecessary temporaries * holding values like |ret|. Inspection of generated code shows it doesn't. */ __asm__("test %1,%1\n\t" "cmovne %2,%0\n\t" "xor %1,%1\n\t" : "+a"(ret), "+c"(tmp_in_replay) : "m"(rec->ret) : "cc"); #else #error Unknown architecture #endif return ret; } #define untraced_syscall6(no, a0, a1, a2, a3, a4, a5) \ untraced_syscall_base(no, (uintptr_t)a0, (uintptr_t)a1, (uintptr_t)a2, \ (uintptr_t)a3, (uintptr_t)a4, (uintptr_t)a5, \ untraced_syscall_instruction) #define untraced_syscall5(no, a0, a1, a2, a3, a4) \ untraced_syscall6(no, a0, a1, a2, a3, a4, 0) #define untraced_syscall4(no, a0, a1, a2, a3) \ untraced_syscall5(no, a0, a1, a2, a3, 0) #define untraced_syscall3(no, a0, a1, a2) untraced_syscall4(no, a0, a1, a2, 0) #define untraced_syscall2(no, a0, a1) untraced_syscall3(no, a0, a1, 0) #define untraced_syscall1(no, a0) untraced_syscall2(no, a0, 0) #define untraced_syscall0(no) untraced_syscall1(no, 0) #define untraced_replayed_syscall6(no, a0, a1, a2, a3, a4, a5) \ untraced_syscall_base(no, (uintptr_t)a0, (uintptr_t)a1, (uintptr_t)a2, \ (uintptr_t)a3, (uintptr_t)a4, (uintptr_t)a5, \ untraced_replayed_syscall_instruction) #define untraced_replayed_syscall5(no, a0, a1, a2, a3, a4) \ untraced_replayed_syscall6(no, a0, a1, a2, a3, a4, 0) #define untraced_replayed_syscall4(no, a0, a1, a2, a3) \ untraced_replayed_syscall5(no, a0, a1, a2, a3, 0) #define untraced_replayed_syscall3(no, a0, a1, a2) \ untraced_replayed_syscall4(no, a0, a1, a2, 0) #define untraced_replayed_syscall2(no, a0, a1) \ untraced_replayed_syscall3(no, a0, a1, 0) #define untraced_replayed_syscall1(no, a0) untraced_replayed_syscall2(no, a0, 0) #define untraced_replayed_syscall0(no) untraced_replayed_syscall1(no, 0) #define privileged_untraced_syscall6(no, a0, a1, a2, a3, a4, a5) \ _raw_syscall(no, (uintptr_t)a0, (uintptr_t)a1, (uintptr_t)a2, (uintptr_t)a3, \ (uintptr_t)a4, (uintptr_t)a5, \ privileged_untraced_syscall_instruction, 0, 0) #define privileged_untraced_syscall5(no, a0, a1, a2, a3, a4) \ privileged_untraced_syscall6(no, a0, a1, a2, a3, a4, 0) #define privileged_untraced_syscall4(no, a0, a1, a2, a3) \ privileged_untraced_syscall5(no, a0, a1, a2, a3, 0) #define privileged_untraced_syscall3(no, a0, a1, a2) \ privileged_untraced_syscall4(no, a0, a1, a2, 0) #define privileged_untraced_syscall2(no, a0, a1) \ privileged_untraced_syscall3(no, a0, a1, 0) #define privileged_untraced_syscall1(no, a0) \ privileged_untraced_syscall2(no, a0, 0) #define privileged_untraced_syscall0(no) privileged_untraced_syscall1(no, 0) static int privileged_untraced_close(int fd) { return privileged_untraced_syscall1(SYS_close, fd); } static int privileged_untraced_fcntl(int fd, int cmd, ...) { va_list ap; void* arg; va_start(ap, cmd); arg = va_arg(ap, void*); va_end(ap); return privileged_untraced_syscall3(RR_FCNTL_SYSCALL, fd, cmd, arg); } #if RR_SYSCALL_FILTERING extern RR_HIDDEN void _syscall_hook_trampoline(void); #else static void _syscall_hook_trampoline(void) {} #endif /** * Do what's necessary to set up buffers for the caller. * |untraced_syscall_ip| lets rr know where our untraced syscalls will * originate from. |addr| is the address of the control socket the * child expects to connect to. |msg| is a pre-prepared IPC that can * be used to share fds; |fdptr| is a pointer to the control-message * data buffer where the fd number being shared will be stored. * |args_vec| provides the tracer with preallocated space to make * socketcall syscalls. * * Return a pointer to the syscallbuf (with an initialized header * including the available size), if syscallbuf is enabled. * * This is a "magic" syscall implemented by rr. */ static void rrcall_init_buffers(struct rrcall_init_buffers_params* args) { privileged_traced_syscall1(SYS_rrcall_init_buffers, args); } /** * Return a counter that generates a signal targeted at this task * every time the task is descheduled |nr_descheds| times. */ static int open_desched_event_counter(size_t nr_descheds, pid_t tid) { struct perf_event_attr attr; int tmp_fd, fd; struct f_owner_ex own; memset(&attr, 0, sizeof(attr)); attr.size = sizeof(attr); attr.type = PERF_TYPE_SOFTWARE; attr.config = PERF_COUNT_SW_CONTEXT_SWITCHES; attr.disabled = 1; attr.sample_period = nr_descheds; tmp_fd = privileged_traced_perf_event_open(&attr, 0 /*self*/, -1 /*any cpu*/, -1, 0); if (0 > tmp_fd) { fatal("Failed to perf_event_open(cs, period=%zu)", nr_descheds); } fd = privileged_traced_fcntl(tmp_fd, F_DUPFD_CLOEXEC, RR_DESCHED_EVENT_FLOOR_FD); if (0 > fd) { fatal("Failed to dup desched fd"); } if (privileged_untraced_close(tmp_fd)) { fatal("Failed to close tmp_fd"); } if (privileged_untraced_fcntl(fd, F_SETFL, O_ASYNC)) { fatal("Failed to fcntl(O_ASYNC) the desched counter"); } own.type = F_OWNER_TID; own.pid = tid; if (privileged_untraced_fcntl(fd, F_SETOWN_EX, &own)) { fatal("Failed to fcntl(SETOWN_EX) the desched counter to this"); } if (privileged_untraced_fcntl(fd, F_SETSIG, SYSCALLBUF_DESCHED_SIGNAL)) { fatal("Failed to fcntl(SETSIG, %d) the desched counter", SYSCALLBUF_DESCHED_SIGNAL); } return fd; } /** * Initialize thread-local buffering state, if enabled. */ static void init_thread(void) { struct rrcall_init_buffers_params args; assert(process_inited); assert(!thread_inited); if (!buffer_enabled) { thread_inited = 1; return; } /* NB: we want this setup emulated during replay. */ desched_counter_fd = open_desched_event_counter(1, privileged_traced_gettid()); args.desched_counter_fd = desched_counter_fd; /* Trap to rr: let the magic begin! * * If the desched signal is currently blocked, then the tracer * will clear our TCB guard and we won't be able to buffer * syscalls. But the tracee will set the guard when (or if) * the signal is unblocked. */ rrcall_init_buffers(&args); /* rr initializes the buffer header. */ buffer = args.syscallbuf_ptr; thread_inited = 1; } /** * After a fork(), we retain a CoW mapping of our parent's syscallbuf. * That's bad, because we don't want to use that buffer. So drop the * parent's copy and reinstall our own. * * FIXME: this "leaks" the parent's old copy in our address space. */ static void post_fork_child(void) { buffer = NULL; thread_inited = 0; init_thread(); } extern char _breakpoint_table_entry_start; extern char _breakpoint_table_entry_end; /** * Initialize process-global buffering state, if enabled. */ static void __attribute__((constructor)) init_process(void) { struct rrcall_init_preload_params params; extern RR_HIDDEN void _stub_buffer(void); extern RR_HIDDEN void _stub_buffer_end(void); #if defined(__i386__) extern RR_HIDDEN void _syscall_hook_trampoline_3d_01_f0_ff_ff(void); extern RR_HIDDEN void _syscall_hook_trampoline_90_90_90(void); struct syscall_patch_hook syscall_patch_hooks[] = { /* pthread_cond_broadcast has 'int 80' followed by * cmp $-4095,%eax (in glibc-2.18-16.fc20.i686) */ { 5, { 0x3d, 0x01, 0xf0, 0xff, 0xff }, (uintptr_t)_syscall_hook_trampoline_3d_01_f0_ff_ff }, /* Our vdso syscall patch has 'int 80' followed by onp; nop; nop */ { 3, { 0x90, 0x90, 0x90 }, (uintptr_t)_syscall_hook_trampoline_90_90_90 } }; /* Load GLIBC 2.1 version of pthread_create. Otherwise we may get the 2.0 version, which cannot handle the pthread_attr values passed by callers expecting to call the glibc 2.1 version. */ real_pthread_create = dlvsym(RTLD_NEXT, "pthread_create", "GLIBC_2.1"); #elif defined(__x86_64__) extern RR_HIDDEN void _syscall_hook_trampoline_48_3d_01_f0_ff_ff(void); extern RR_HIDDEN void _syscall_hook_trampoline_48_3d_00_f0_ff_ff(void); extern RR_HIDDEN void _syscall_hook_trampoline_48_8b_3c_24(void); extern RR_HIDDEN void _syscall_hook_trampoline_5a_5e_c3(void); extern RR_HIDDEN void _syscall_hook_trampoline_90_90_90(void); struct syscall_patch_hook syscall_patch_hooks[] = { /* Many glibc syscall wrappers (e.g. read) have 'syscall' followed by * cmp $-4095,%rax (in glibc-2.18-16.fc20.x86_64) */ { 6, { 0x48, 0x3d, 0x01, 0xf0, 0xff, 0xff }, (uintptr_t)_syscall_hook_trampoline_48_3d_01_f0_ff_ff }, /* Many glibc syscall wrappers (e.g. __libc_recv) have 'syscall' followed by * cmp $-4096,%rax (in glibc-2.18-16.fc20.x86_64) */ { 6, { 0x48, 0x3d, 0x00, 0xf0, 0xff, 0xff }, (uintptr_t)_syscall_hook_trampoline_48_3d_00_f0_ff_ff }, /* Many glibc syscall wrappers (e.g. read) have 'syscall' followed by * mov (%rsp),%rdi (in glibc-2.18-16.fc20.x86_64) */ { 4, { 0x48, 0x8b, 0x3c, 0x24 }, (uintptr_t)_syscall_hook_trampoline_48_8b_3c_24 }, /* __lll_unlock_wake has 'syscall' followed by * pop %rdx; pop %rsi; ret */ { 3, { 0x5a, 0x5e, 0xc3 }, (uintptr_t)_syscall_hook_trampoline_5a_5e_c3 }, /* Our VDSO vsyscall patches have 'syscall' followed by "nop; nop; nop" */ { 3, { 0x90, 0x90, 0x90 }, (uintptr_t)_syscall_hook_trampoline_90_90_90 } }; real_pthread_create = dlsym(RTLD_NEXT, "pthread_create"); #else #error Unknown architecture #endif if (process_inited) { return; } buffer_enabled = !!getenv(SYSCALLBUF_ENABLED_ENV_VAR); pthread_atfork(NULL, NULL, post_fork_child); params.syscallbuf_enabled = buffer_enabled; params.syscallbuf_fds_disabled = buffer_enabled ? syscallbuf_fds_disabled : NULL; params.syscall_hook_trampoline = (void*)_syscall_hook_trampoline; params.syscall_hook_stub_buffer = (void*)_stub_buffer; params.syscall_hook_stub_buffer_end = (void*)_stub_buffer_end; params.syscall_patch_hook_count = sizeof(syscall_patch_hooks) / sizeof(syscall_patch_hooks[0]); params.syscall_patch_hooks = syscall_patch_hooks; params.in_replay_flag = &in_replay; params.breakpoint_table = &_breakpoint_table_entry_start; params.breakpoint_table_entry_size = &_breakpoint_table_entry_end - &_breakpoint_table_entry_start; privileged_traced_syscall1(SYS_rrcall_init_preload, ¶ms); process_inited = 1; init_thread(); } /** * In a thread newly created by |pthread_create()|, first initialize * thread-local internal rr data, then trampoline into the user's * thread function. */ struct thread_func_data { void* (*start_routine)(void*); void* arg; }; static void* thread_trampoline(void* arg) { struct thread_func_data data = *(struct thread_func_data*)arg; free(arg); init_thread(); return data.start_routine(data.arg); } /** * Interpose |pthread_create()| so that we can use a custom trampoline * function (see above) that initializes rr thread-local data for new * threads. * * This is a wrapper of |pthread_create()|, but not like the ones * below: we don't wrap |pthread_create()| in order to buffer its * syscalls, rather in order to initialize rr thread data. */ int pthread_create(pthread_t* thread, const pthread_attr_t* attr, void* (*start_routine)(void*), void* arg) { struct thread_func_data* data = malloc(sizeof(*data)); void* saved_buffer = buffer; int ret; /* Init syscallbuf now if we haven't yet (e.g. if pthread_create is called * during library initialization before our preload library). * This also fetches real_pthread_create which we'll need below. */ init_process(); data->start_routine = start_routine; data->arg = arg; /* Don't let the new thread use our TLS pointer. */ buffer = NULL; ret = real_pthread_create(thread, attr, thread_trampoline, data); buffer = saved_buffer; return ret; } #define PTHREAD_MUTEX_TYPE_MASK 3 #define PTHREAD_MUTEX_PRIO_INHERIT_NP 32 static void fix_mutex_kind(pthread_mutex_t* mutex) { /* Disable priority inheritance. */ mutex->__data.__kind &= ~PTHREAD_MUTEX_PRIO_INHERIT_NP; } /* * We bind directly to __pthread_mutex_lock and __pthread_mutex_trylock * because setting up indirect function pointers in init_process requires * calls to dlsym which itself can call pthread_mutex_lock (e.g. via * application code overriding malloc/calloc to use a pthreads-based * implementation). */ extern int __pthread_mutex_lock(pthread_mutex_t* mutex); extern int __pthread_mutex_trylock(pthread_mutex_t* mutex); /* Prevent use of lock elision; Haswell's TSX/RTM features used by lock elision increment the rbc perf counter for instructions which are later rolled back if the transaction fails. */ int pthread_mutex_lock(pthread_mutex_t* mutex) { fix_mutex_kind(mutex); return __pthread_mutex_lock(mutex); } int pthread_mutex_timedlock(pthread_mutex_t* mutex, const struct timespec* abstime) { fix_mutex_kind(mutex); /* No __pthread_mutex_timedlock stub exists, so we have to use the * indirect call. */ if (!real_pthread_mutex_timedlock) { real_pthread_mutex_timedlock = dlsym(RTLD_NEXT, "pthread_mutex_timedlock"); } return real_pthread_mutex_timedlock(mutex, abstime); } int pthread_mutex_trylock(pthread_mutex_t* mutex) { fix_mutex_kind(mutex); return __pthread_mutex_trylock(mutex); } /** * syscall hooks start here. * * !!! NBB !!!: from here on, all code that executes within the * critical sections of transactions *MUST KEEP $ip IN THE SYSCALLBUF * CODE*. That means no calls into libc, even for innocent-looking * functions like |memcpy()|. * * How syscall hooks operate: * * 1. The rr tracer monkey-patches __kernel_vsyscall() to jump to * _syscall_hook_trampoline() above. * 2. When a call is made to __kernel_vsyscall(), it jumps to * _syscall_hook_trampoline(), where the syscall params are * packaged up into a call to syscall_hook() below. * 3. syscall_hook() dispatches to a syscall processor function. * 4. The syscall processor prepares a new record in the buffer. See * struct syscallbuf_record for record fields. If the buffer runs * out of space, the processor function aborts and makes a traced * syscall, trapping to rr. rr then flushes the buffer. Records * are directly saved to trace, and a buffer-flush event is * recorded without execution info because it's a synthetic event. * 5. Then, the syscall processor redirects all potential output * for the syscall to the record (and corrects the overall size of * the record while it does so). * 6. The syscall is invoked through a asm helper that does *not* * ptrace-trap to rr. * 7. The syscall output, written on the buffer, is copied to the * original pointers provided by the user. Take notice that this * part saves us the injection of the data on replay, as we only * need to push the data to the buffer and the wrapper code will * copy it to the user address for us. * 8. The return value and overall size are saved to the record. */ /** * Call this and save the result at the start of every system call we * want to buffer. The result is a pointer into the record space. You * can add to this pointer to allocate space in the trace record. * However, do not read or write through this pointer until * start_commit_syscall() has been called. And you *must* call * start_commit_syscall() after this is called, otherwise buffering * state will be inconsistent between syscalls. * * See |sys_clock_gettime()| for a simple example of how this helper * should be used to buffer outparam data. */ static void* prep_syscall(void) { if (!buffer) { return NULL; } if (buffer_hdr()->locked) { /* We may be reentering via a signal handler. Return * an invalid pointer. */ return NULL; } /* We don't need to worry about a race between testing * |locked| and setting it here. rr recording is responsible * for ensuring signals are not delivered during * syscall_buffer prologue and epilogue code. * * XXX except for synchronous signals generated in the syscall * buffer code, while reading/writing user pointers */ buffer_hdr()->locked = 1; /* "Allocate" space for a new syscall record, not including * syscall outparam data. */ return buffer_last() + sizeof(struct syscallbuf_record); } /** * Like prep_syscall, but preps a syscall to operate on a particular fd. If * syscallbuf is disabled for this fd, returns NULL (in which case * start_commit_syscall will abort cleanly and a traced syscall will be used). */ static void* prep_syscall_for_fd(int fd) { if (fd < 0 || fd >= SYSCALLBUF_FDS_DISABLED_SIZE || syscallbuf_fds_disabled[fd]) { return NULL; } return prep_syscall(); } static void arm_desched_event(void) { /* Don't trace the ioctl; doing so would trigger a flushing * ptrace trap, which is exactly what this code is trying to * avoid! :) Although we don't allocate extra space for these * ioctl's, we do record that we called them; the replayer * knows how to skip over them. */ if ((int)privileged_untraced_syscall3(SYS_ioctl, desched_counter_fd, PERF_EVENT_IOC_ENABLE, 0)) { fatal("Failed to ENABLE counter %d", desched_counter_fd); } } static void disarm_desched_event(void) { /* See above. */ if ((int)privileged_untraced_syscall3(SYS_ioctl, desched_counter_fd, PERF_EVENT_IOC_DISABLE, 0)) { fatal("Failed to DISABLE counter %d", desched_counter_fd); } } /** * Return 1 if it's ok to proceed with buffering this system call. * Return 0 if we should trace the system call. * This must be checked before proceeding with the buffered system call. */ /* (Negative numbers so as to not be valid syscall numbers, in case * the |int| arguments below are passed in the wrong order.) */ enum { MAY_BLOCK = -1, WONT_BLOCK = -2 }; static int start_commit_buffered_syscall(int syscallno, void* record_end, int blockness) { void* record_start; void* stored_end; struct syscallbuf_record* rec; if (!buffer) { return 0; } record_start = buffer_last(); stored_end = record_start + stored_record_size(record_end - record_start); rec = record_start; if (stored_end < record_start + sizeof(struct syscallbuf_record)) { /* Either a catastrophic buffer overflow or * we failed to lock the buffer. Just bail out. */ return 0; } if (stored_end > (void*)buffer_end() - sizeof(struct syscallbuf_record)) { /* Buffer overflow. * Unlock the buffer and then execute the system call * with a trap to rr. Note that we reserve enough * space in the buffer for the next prep_syscall(). */ buffer_hdr()->locked = 0; return 0; } /* Store this breadcrumb so that the tracer can find out what * syscall we're executing if our registers are in a weird * state. If we end up aborting this syscall, no worry, this * will just be overwritten later. * * NBB: this *MUST* be set before the desched event is * armed. */ rec->syscallno = syscallno; rec->desched = MAY_BLOCK == blockness; rec->size = record_end - record_start; if (rec->desched) { /* NB: the ordering of the next two statements is * important. * * We set this flag to notify rr that it should pay * attention to desched signals pending for this task. * We have to set it *before* we arm the notification * because we can't set the flag atomically with * arming the event (too bad there's no ioctl() for * querying the event enabled-ness state). That's * important because if the notification is armed, * then rr must be confident that when it disarms the * event, the tracee is at an execution point that * *must not* need the desched event. * * If we were to set the flag non-atomically after the * event was armed, then if a desched signal was * delivered right at the instruction that set the * flag, rr wouldn't know that it needed to advance * the tracee to the untraced syscall entry point. * (And if rr didn't do /that/, then the syscall might * block without rr knowing it, and the recording * session would deadlock.) */ buffer_hdr()->desched_signal_may_be_relevant = 1; arm_desched_event(); } return 1; } /** * Commit the record for a buffered system call. record_end can be * adjusted downward from what was passed to * start_commit_buffered_syscall, if not all of the initially * requested space is needed. The result of this function should be * returned directly by the kernel syscall hook. */ static long commit_raw_syscall(int syscallno, void* record_end, long ret) { void* record_start = buffer_last(); struct syscallbuf_record* rec = record_start; struct syscallbuf_hdr* hdr = buffer_hdr(); void (*breakpoint_function)(void) = 0; assert(record_end >= record_start); rec->size = record_end - record_start; assert(buffer_hdr()->locked); /* NB: the ordering of this statement with the * |disarm_desched_event()| call below is important. * * We clear this flag to notify rr that the may-block syscall * has finished, so there's no danger of blocking anymore. * (And thus the desched signal is no longer relevant.) We * have to clear this *before* disarming the event, because if * rr sees the flag set, it has to PTRACE_SYSCALL this task to * ensure it reaches an execution point where the desched * signal is no longer relevant. We have to use the ioctl() * that disarms the event as a safe "backstop" that can be hit * by the PTRACE_SYSCALL. * * If we were to clear the flag *after* disarming the event, * and the signal arrived at the instruction that cleared the * flag, and rr issued the PTRACE_SYSCALL, then this tracee * could fly off to any unknown execution point, including an * iloop. So the recording session could livelock. */ hdr->desched_signal_may_be_relevant = 0; if (rec->syscallno != syscallno) { fatal("Record is for %d but trying to commit %d", rec->syscallno, syscallno); } if (hdr->abort_commit) { /* We were descheduled in the middle of a may-block * syscall, and it was recorded as a normal entry/exit * pair. So don't record the syscall in the buffer or * replay will go haywire. */ hdr->abort_commit = 0; /* Clear the return value that rr pus there during replay */ rec->ret = 0; } else { int breakpoint_entry_size = &_breakpoint_table_entry_end - &_breakpoint_table_entry_start; rec->ret = ret; // Finish 'rec' first before updating num_rec_bytes, since // rr might read the record anytime after this update. hdr->num_rec_bytes += stored_record_size(rec->size); breakpoint_function = (void*)(&_breakpoint_table_entry_start + (hdr->num_rec_bytes / 8) * breakpoint_entry_size); } if (rec->desched) { disarm_desched_event(); } /* NBB: for may-block syscalls that are descheduled, the * tracer uses the previous ioctl() as a stable point to reset * the record counter. Therefore nothing from here on in the * current txn must touch the record counter (at least, must * not assume it's unchanged). */ buffer_hdr()->locked = 0; if (breakpoint_function) { /* Call the breakpoint function corresponding to the record we just * committed. This function just returns, but during replay it gives rr * a chance to set a breakpoint for when a specific syscallbuf record * has been processed. */ breakpoint_function(); } return ret; } /** * |ret_size| is the result of a syscall indicating how much data was returned * in scratch buffer |buf2|; this function copies that data to |buf| and returns * a pointer to the end of it. If there is no scratch buffer (|buf2| is NULL) * just returns |ptr|. */ static void* copy_output_buffer(int ret_size, void* ptr, void* buf, void* buf2) { if (!buf2) { return ptr; } if (ret_size <= 0) { return buf2; } local_memcpy(buf, buf2, ret_size); return buf2 + ret_size; } /** * Copy an input parameter to the syscallbuf where the kernel needs to * read and write it. During replay, we do a no-op self-copy in the buffer * so that the buffered data is not lost. * This code is written in assembler to ensure that the registers that receive * values differing between record and replay (%0, rsi/esi, and flags) * are reset to values that are the same between record and replay immediately * afterward. This guards against diverging register values leaking into * later code. * Use local_memcpy or plain assignment instead if the kernel is not going to * overwrite the values. */ static void memcpy_input_parameter(void* buf, void* src, int size) { #if defined(__i386__) || defined(__x86_64__) unsigned char tmp_in_replay = in_replay; __asm__ __volatile__("test %0,%0\n\t" "cmovne %1,%2\n\t" "rep movsb\n\t" "xor %0,%0\n\t" "xor %2,%2\n\t" : "+a"(tmp_in_replay), "+D"(buf), "+S"(src), "+c"(size) : : "cc", "memory"); #else #error Unknown architecture #endif } /** * During recording, we copy *real to *buf. * During replay, we copy *buf to *real. * Behaves like memcpy_input_parameter in terms of hiding differences between * recording and replay. */ static void copy_futex_int(uint32_t* buf, uint32_t* real) { #if defined(__i386__) || defined(__x86_64__) uint32_t tmp_in_replay = in_replay; __asm__ __volatile__("test %0,%0\n\t" "mov %2,%0\n\t" "cmovne %1,%0\n\t" "mov %0,%1\n\t" "mov %0,%2\n\t" /* This instruction is just to clear flags */ "xor %0,%0\n\t" : "+a"(tmp_in_replay) : "m"(*buf), "m"(*real) : "cc", "memory"); #else #error Unknown architecture #endif } /* Keep syscalls in alphabetical order, please. */ static long sys_access(const struct syscall_info* call) { const int syscallno = SYS_access; const char* pathname = (const char*)call->args[0]; int mode = call->args[1]; void* ptr = prep_syscall(); long ret; assert(syscallno == call->no); if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) { return traced_raw_syscall(call); } ret = untraced_syscall2(syscallno, pathname, mode); return commit_raw_syscall(syscallno, ptr, ret); } static long sys_clock_gettime(const struct syscall_info* call) { const int syscallno = SYS_clock_gettime; clockid_t clk_id = (clockid_t)call->args[0]; struct timespec* tp = (struct timespec*)call->args[1]; void* ptr = prep_syscall(); struct timespec* tp2 = NULL; long ret; assert(syscallno == call->no); if (tp) { tp2 = ptr; ptr += sizeof(*tp2); } if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) { return traced_raw_syscall(call); } ret = untraced_syscall2(syscallno, clk_id, tp2); if (tp) { local_memcpy(tp, tp2, sizeof(*tp)); } return commit_raw_syscall(syscallno, ptr, ret); } static long sys_close(const struct syscall_info* call) { const int syscallno = SYS_close; int fd = call->args[0]; void* ptr = prep_syscall_for_fd(fd); long ret; if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) { return traced_raw_syscall(call); } ret = untraced_syscall1(syscallno, fd); return commit_raw_syscall(syscallno, ptr, ret); } static long sys_open(const struct syscall_info* call); static long sys_creat(const struct syscall_info* call) { const char* pathname = (const char*)call->args[0]; mode_t mode = call->args[1]; /* Thus sayeth the man page: * * creat() is equivalent to open() with flags equal to * O_CREAT|O_WRONLY|O_TRUNC. */ struct syscall_info open_call; open_call.no = SYS_open; open_call.args[0] = (long)pathname; open_call.args[1] = O_CREAT | O_TRUNC | O_WRONLY; open_call.args[2] = mode; return sys_open(&open_call); } static int sys_fcntl64_no_outparams(const struct syscall_info* call) { const int syscallno = RR_FCNTL_SYSCALL; int fd = call->args[0]; int cmd = call->args[1]; long arg = call->args[2]; /* None of the no-outparam fcntl's are known to be * may-block. */ void* ptr = prep_syscall_for_fd(fd); long ret; assert(syscallno == call->no); if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) { return traced_raw_syscall(call); } ret = untraced_syscall3(syscallno, fd, cmd, arg); return commit_raw_syscall(syscallno, ptr, ret); } static int sys_fcntl64_own_ex(const struct syscall_info* call) { const int syscallno = RR_FCNTL_SYSCALL; int fd = call->args[0]; int cmd = call->args[1]; struct f_owner_ex* owner = (struct f_owner_ex*)call->args[2]; /* The OWN_EX fcntl's aren't may-block. */ void* ptr = prep_syscall_for_fd(fd); struct f_owner_ex* owner2 = NULL; long ret; assert(syscallno == call->no); if (owner) { owner2 = ptr; ptr += sizeof(*owner2); } if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) { return traced_raw_syscall(call); } if (owner2) { memcpy_input_parameter(owner2, owner, sizeof(*owner2)); } ret = untraced_syscall3(syscallno, fd, cmd, owner2); if (owner2) { local_memcpy(owner, owner2, sizeof(*owner)); } return commit_raw_syscall(syscallno, ptr, ret); } static int sys_fcntl64_xlk64(const struct syscall_info* call) { const int syscallno = RR_FCNTL_SYSCALL; int fd = call->args[0]; int cmd = call->args[1]; struct flock64* lock = (struct flock64*)call->args[2]; void* ptr = prep_syscall_for_fd(fd); struct flock64* lock2 = NULL; long ret; assert(syscallno == call->no); if (lock) { lock2 = ptr; ptr += sizeof(*lock2); } if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) { return traced_raw_syscall(call); } if (lock2) { memcpy_input_parameter(lock2, lock, sizeof(*lock2)); } ret = untraced_syscall3(syscallno, fd, cmd, lock2); if (lock2) { local_memcpy(lock, lock2, sizeof(*lock)); } return commit_raw_syscall(syscallno, ptr, ret); } #if defined(SYS_fcntl64) static long sys_fcntl64(const struct syscall_info* call) #else static long sys_fcntl(const struct syscall_info* call) #endif { switch (call->args[1]) { case F_DUPFD: case F_GETFD: case F_GETFL: case F_GETOWN: case F_SETFL: case F_SETFD: case F_SETOWN: case F_SETSIG: return sys_fcntl64_no_outparams(call); case F_GETOWN_EX: case F_SETOWN_EX: return sys_fcntl64_own_ex(call); #if F_SETLK != F_SETLK64 case F_SETLK64: #else case F_SETLK: #endif return sys_fcntl64_xlk64(call); case F_GETLK: #if F_SETLK != F_SETLK64 case F_SETLK: #endif case F_SETLKW: #if F_GETLK != F_GETLK64 case F_GETLK64: #endif #if F_SETLKW != F_SETLKW64 case F_SETLKW64: #endif /* TODO: buffer the F_*LK API. */ /* fall through */ default: return traced_raw_syscall(call); } } static long sys_safe_nonblocking_ioctl(const struct syscall_info* call) { const int syscallno = SYS_ioctl; int fd = call->args[0]; void* ptr = prep_syscall_for_fd(fd); long ret; if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) { return traced_raw_syscall(call); } ret = untraced_syscall2(syscallno, fd, call->args[1]); return commit_raw_syscall(syscallno, ptr, ret); } static long sys_ioctl(const struct syscall_info* call) { switch (call->args[1]) { case FIOCLEX: case FIONCLEX: return sys_safe_nonblocking_ioctl(call); default: return traced_raw_syscall(call); } } static long sys_futex(const struct syscall_info* call) { enum { FUTEX_USES_UADDR2 = 1 << 0, }; int op = call->args[1]; int flags = 0; switch (FUTEX_CMD_MASK & op) { case FUTEX_WAKE: break; case FUTEX_CMP_REQUEUE: case FUTEX_WAKE_OP: flags |= FUTEX_USES_UADDR2; break; /* It turns out not to be worth buffering the FUTEX_WAIT* * calls. When a WAIT call is made, we know almost for sure * that the tracee is going to be desched'd (otherwise the * userspace CAS would have succeeded). This is unlike * read/write, f.e., where the vast majority of calls aren't * desched'd and the overhead is worth it. So all that * buffering WAIT does is add the overhead of arming/disarming * desched (which is a measurable perf loss). * * NB: don't ever try to buffer FUTEX_LOCK_PI; it requires * special processing in the tracer process (in addition to * not being worth doing for perf reasons). */ default: return traced_raw_syscall(call); } const int syscallno = SYS_futex; uint32_t* uaddr = (uint32_t*)call->args[0]; uint32_t val = call->args[2]; const struct timespec* timeout = (const struct timespec*)call->args[3]; uint32_t* uaddr2 = (uint32_t*)call->args[4]; uint32_t val3 = call->args[5]; void* ptr = prep_syscall(); uint32_t* saved_uaddr; uint32_t* saved_uaddr2 = NULL; long ret; assert(syscallno == call->no); /* We have to record the value of the futex at kernel exit, * but we can't substitute a scratch pointer for the uaddrs: * the futex identity is the memory cell. There are schemes * that would allow us to use scratch futexes, but they get * complicated quickly. */ saved_uaddr = ptr; ptr += sizeof(*saved_uaddr); if (FUTEX_USES_UADDR2 & flags) { saved_uaddr2 = ptr; ptr += sizeof(*saved_uaddr2); } /* See above; it's not worth buffering may-block futex * calls. */ if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) { return traced_raw_syscall(call); } ret = untraced_syscall6(syscallno, uaddr, op, val, timeout, uaddr2, val3); /* During recording, save the real outparams to the buffer. * During replay, save the values from the buffer to the real outparams. * * The *ONLY* reason it's correct for us to read the outparams * carelessly is that rr protects this syscallbuf * transaction as as a critical section. */ copy_futex_int(saved_uaddr, uaddr); if (saved_uaddr2) { copy_futex_int(saved_uaddr2, uaddr2); } return commit_raw_syscall(syscallno, ptr, ret); } static long sys_gettimeofday(const struct syscall_info* call) { const int syscallno = SYS_gettimeofday; struct timeval* tp = (struct timeval*)call->args[0]; struct timezone* tzp = (struct timezone*)call->args[1]; /* XXX it seems odd that clock_gettime() is spec'd to be * async-signal-safe while gettimeofday() isn't, but that's * what the docs say! */ void* ptr = prep_syscall(); struct timeval* tp2 = NULL; struct timezone* tzp2 = NULL; long ret; assert(syscallno == call->no); if (tp) { tp2 = ptr; ptr += sizeof(*tp2); } if (tzp) { tzp2 = ptr; ptr += sizeof(*tzp2); } if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) { return traced_raw_syscall(call); } ret = untraced_syscall2(syscallno, tp2, tzp2); if (tp) { local_memcpy(tp, tp2, sizeof(*tp)); } if (tzp) { local_memcpy(tzp, tzp2, sizeof(*tzp)); } return commit_raw_syscall(syscallno, ptr, ret); } #if defined(SYS__llseek) static long sys__llseek(const struct syscall_info* call) { const int syscallno = SYS__llseek; int fd = call->args[0]; unsigned long offset_high = call->args[1]; unsigned long offset_low = call->args[2]; loff_t* result = (loff_t*)call->args[3]; unsigned int whence = call->args[4]; void* ptr = prep_syscall_for_fd(fd); loff_t* result2 = NULL; long ret; assert(syscallno == call->no); if (result) { result2 = ptr; ptr += sizeof(*result2); } if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) { return traced_raw_syscall(call); } if (result2) { memcpy_input_parameter(result2, result, sizeof(*result2)); } ret = untraced_syscall5(syscallno, fd, offset_high, offset_low, result2, whence); if (result2) { *result = *result2; } return commit_raw_syscall(syscallno, ptr, ret); } #else static long sys_lseek(const struct syscall_info* call) { const int syscallno = SYS_lseek; int fd = call->args[0]; off_t off = call->args[1]; int whence = call->args[2]; void* ptr = prep_syscall_for_fd(fd); off_t ret = 0; assert(syscallno == call->no); if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) { return traced_raw_syscall(call); } ret = untraced_syscall3(syscallno, fd, off, whence); return commit_raw_syscall(syscallno, ptr, ret); } #endif static long sys_madvise(const struct syscall_info* call) { const int syscallno = SYS_madvise; void* addr = (void*)call->args[0]; size_t length = call->args[1]; int advice = call->args[2]; void* ptr; long ret; switch (advice) { case MADV_DOFORK: case MADV_DONTFORK: case MADV_REMOVE: return traced_raw_syscall(call); } ptr = prep_syscall(); assert(syscallno == call->no); if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) { return traced_raw_syscall(call); } /* Ensure this syscall happens during replay. In particular MADV_DONTNEED * must be executed. */ ret = untraced_replayed_syscall3(syscallno, addr, length, advice); return commit_raw_syscall(syscallno, ptr, ret); } static long sys_open(const struct syscall_info* call) { const int syscallno = SYS_open; const char* pathname = (const char*)call->args[0]; int flags = call->args[1]; mode_t mode = call->args[2]; /* NB: not arming the desched event is technically correct, * since open() can't deadlock if it blocks. However, not * allowing descheds here may cause performance issues if the * open does block for a while. Err on the side of simplicity * until we have perf data. */ void* ptr; long ret; assert(syscallno == call->no); /* The strcmp() done here is OK because we're not in the * critical section yet. */ if (is_blacklisted_filename(pathname)) { /* Would be nice to debug() here, but that would flush * the syscallbuf ... This special bail-out case is * deterministic, so no need to save any breadcrumbs * in the syscallbuf. */ return -ENOENT; } ptr = prep_syscall(); if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) { return traced_raw_syscall(call); } ret = untraced_syscall3(syscallno, pathname, flags, mode); return commit_raw_syscall(syscallno, ptr, ret); } /** * Make this function external so desched_ticks.py can set a breakpoint on it. */ void __before_poll_syscall_breakpoint(void) {} static long sys_poll(const struct syscall_info* call) { const int syscallno = SYS_poll; struct pollfd* fds = (struct pollfd*)call->args[0]; unsigned int nfds = call->args[1]; int timeout = call->args[2]; void* ptr = prep_syscall(); struct pollfd* fds2 = NULL; long ret; assert(syscallno == call->no); if (fds && nfds > 0) { fds2 = ptr; ptr += nfds * sizeof(*fds2); } if (!start_commit_buffered_syscall(syscallno, ptr, MAY_BLOCK)) { return traced_raw_syscall(call); } if (fds2) { memcpy_input_parameter(fds2, fds, nfds * sizeof(*fds2)); } __before_poll_syscall_breakpoint(); ret = untraced_syscall3(syscallno, fds2, nfds, timeout); if (fds2 && ret >= 0) { /* NB: even when poll returns 0 indicating no pending * fds, it still sets each .revent outparam to 0. * (Reasonably.) So we always need to copy on return * value >= 0. * It's important that we not copy when there's an error. * The syscallbuf commit might have been aborted, which means * during replay fds2 might be non-recorded data, so we'd be * incorrectly trashing 'fds'. */ local_memcpy(fds, fds2, nfds * sizeof(*fds)); } return commit_raw_syscall(syscallno, ptr, ret); } static long sys_read(const struct syscall_info* call) { const int syscallno = SYS_read; int fd = call->args[0]; void* buf = (void*)call->args[1]; size_t count = call->args[2]; void* ptr = prep_syscall_for_fd(fd); void* buf2 = NULL; long ret; assert(syscallno == call->no); if (buf && count > 0) { buf2 = ptr; ptr += count; } if (!start_commit_buffered_syscall(syscallno, ptr, MAY_BLOCK)) { return traced_raw_syscall(call); } ret = untraced_syscall3(syscallno, fd, buf2, count); ptr = copy_output_buffer(ret, ptr, buf, buf2); return commit_raw_syscall(syscallno, ptr, ret); } static long sys_readlink(const struct syscall_info* call) { const int syscallno = SYS_readlink; const char* path = (const char*)call->args[0]; char* buf = (char*)call->args[1]; int bufsiz = call->args[2]; void* ptr = prep_syscall(); char* buf2 = NULL; long ret; assert(syscallno == call->no); if (buf && bufsiz > 0) { buf2 = ptr; ptr += bufsiz; } if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) { return traced_raw_syscall(call); } ret = untraced_syscall3(syscallno, path, buf2, bufsiz); ptr = copy_output_buffer(ret, ptr, buf, buf2); return commit_raw_syscall(syscallno, ptr, ret); } #if defined(SYS_socketcall) static long sys_socketcall_recv(const struct syscall_info* call) { const int syscallno = SYS_socketcall; long* args = (long*)call->args[1]; int sockfd = args[0]; void* buf = (void*)args[1]; size_t len = args[2]; unsigned int flags = args[3]; unsigned long new_args[4]; void* ptr = prep_syscall_for_fd(sockfd); void* buf2 = NULL; long ret; assert(syscallno == call->no); if (buf && len > 0) { buf2 = ptr; ptr += len; } if (!start_commit_buffered_syscall(syscallno, ptr, MAY_BLOCK)) { return traced_raw_syscall(call); } new_args[0] = sockfd; new_args[1] = (unsigned long)buf2; new_args[2] = len; new_args[3] = flags; ret = untraced_syscall2(SYS_socketcall, SYS_RECV, new_args); ptr = copy_output_buffer(ret, ptr, buf, buf2); return commit_raw_syscall(syscallno, ptr, ret); } static long sys_socketcall(const struct syscall_info* call) { switch (call->args[0]) { case SYS_RECV: return sys_socketcall_recv(call); default: return traced_raw_syscall(call); } } #endif #ifdef SYS_recvfrom static long sys_recvfrom(const struct syscall_info* call) { const int syscallno = SYS_recvfrom; int sockfd = call->args[0]; void* buf = (void*)call->args[1]; size_t len = call->args[2]; int flags = call->args[3]; struct sockaddr* src_addr = (struct sockaddr*)call->args[4]; socklen_t* addrlen = (socklen_t*)call->args[5]; void* ptr = prep_syscall_for_fd(sockfd); void* buf2 = NULL; struct sockaddr* src_addr2 = NULL; socklen_t* addrlen2 = NULL; long ret; assert(syscallno == call->no); if (src_addr) { src_addr2 = ptr; ptr += sizeof(*src_addr); } if (addrlen) { addrlen2 = ptr; ptr += sizeof(*addrlen); } if (buf && len > 0) { buf2 = ptr; ptr += len; } if (!start_commit_buffered_syscall(syscallno, ptr, MAY_BLOCK)) { return traced_raw_syscall(call); } if (addrlen) { memcpy_input_parameter(addrlen2, addrlen, sizeof(*addrlen2)); } ret = untraced_syscall6(syscallno, sockfd, buf2, len, flags, src_addr2, addrlen2); if (ret >= 0) { if (src_addr2) { local_memcpy(src_addr, src_addr2, sizeof(*src_addr)); } if (addrlen2) { *addrlen = *addrlen2; } } ptr = copy_output_buffer(ret, ptr, buf, buf2); return commit_raw_syscall(syscallno, ptr, ret); } #endif #ifdef SYS_recvmsg static long sys_recvmsg(const struct syscall_info* call) { const int syscallno = SYS_recvmsg; int sockfd = call->args[0]; struct msghdr* msg = (struct msghdr*)call->args[1]; int flags = call->args[2]; void* ptr = prep_syscall_for_fd(sockfd); long ret; struct msghdr* msg2; void* ptr_base = ptr; void* ptr_overwritten_end; void* ptr_bytes_start; void* ptr_end; size_t i; assert(syscallno == call->no); /* Compute final buffer size up front, before writing syscall inputs to the * buffer. Thus if we decide not to buffer this syscall, we bail out * before trying to write to a buffer that won't be recorded and may be * invalid (e.g. overflow). */ ptr += sizeof(struct msghdr) + sizeof(struct iovec) * msg->msg_iovlen; if (msg->msg_name) { ptr += msg->msg_namelen; } if (msg->msg_control) { ptr += msg->msg_controllen; } for (i = 0; i < msg->msg_iovlen; ++i) { ptr += msg->msg_iov[i].iov_len; } if (!start_commit_buffered_syscall(syscallno, ptr, MAY_BLOCK)) { return traced_raw_syscall(call); } /** * The kernel only writes to the struct msghdr, and the iov buffers. We must * not overwrite that data (except using memcpy_input_parameter) during * replay. For the rest of the data, the values we write here during replay * are guaranteed to match what was recorded in the buffer. * We can't rely on the values we wrote here during recording also being * here during replay since the syscall might have been aborted and our * written data not recorded. */ msg2 = ptr = ptr_base; memcpy_input_parameter(msg2, msg, sizeof(*msg)); ptr += sizeof(struct msghdr); msg2->msg_iov = ptr; ptr += sizeof(struct iovec) * msg->msg_iovlen; ptr_overwritten_end = ptr; if (msg->msg_name) { msg2->msg_name = ptr; ptr += msg->msg_namelen; } if (msg->msg_control) { msg2->msg_control = ptr; ptr += msg->msg_controllen; } ptr_bytes_start = ptr; for (i = 0; i < msg->msg_iovlen; ++i) { msg2->msg_iov[i].iov_base = ptr; ptr += msg->msg_iov[i].iov_len; msg2->msg_iov[i].iov_len = msg->msg_iov[i].iov_len; } ret = untraced_syscall3(syscallno, sockfd, msg2, flags); if (ret >= 0) { size_t bytes = ret; size_t i; if (msg->msg_name) { local_memcpy(msg->msg_name, msg2->msg_name, msg2->msg_namelen); } msg->msg_namelen = msg2->msg_namelen; if (msg->msg_control) { local_memcpy(msg->msg_control, msg2->msg_control, msg2->msg_controllen); } msg->msg_controllen = msg2->msg_controllen; ptr_end = ptr_bytes_start + bytes; for (i = 0; i < msg->msg_iovlen; ++i) { long copy_bytes = bytes < msg->msg_iov[i].iov_len ? bytes : msg->msg_iov[i].iov_len; local_memcpy(msg->msg_iov[i].iov_base, msg2->msg_iov[i].iov_base, copy_bytes); bytes -= copy_bytes; } msg->msg_flags = msg2->msg_flags; } else { /* Allocate record space as least to cover the data we overwrote above. * We don't want to start the next record overlapping that data, since then * we'll corrupt it during replay. */ ptr_end = ptr_overwritten_end; } return commit_raw_syscall(syscallno, ptr_end, ret); } #endif static long sys_time(const struct syscall_info* call) { const int syscallno = SYS_time; time_t* tp = (time_t*)call->args[0]; void* ptr = prep_syscall(); long ret; assert(syscallno == call->no); if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) { return traced_raw_syscall(call); } ret = untraced_syscall1(syscallno, NULL); if (tp) { /* No error is possible here. */ *tp = ret; } return commit_raw_syscall(syscallno, ptr, ret); } static long sys_xstat64(const struct syscall_info* call) { const int syscallno = call->no; /* NB: this arg may be a string or an fd, but for the purposes * of this generic helper we don't care. */ long what = call->args[0]; struct stat64* buf = (struct stat64*)call->args[1]; /* Like open(), not arming the desched event because it's not * needed for correctness, and there are no data to suggest * whether it's a good idea perf-wise. */ void* ptr = prep_syscall(); struct stat64* buf2 = NULL; long ret; if (buf) { buf2 = ptr; ptr += sizeof(*buf2); } if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) { return traced_raw_syscall(call); } ret = untraced_syscall2(syscallno, what, buf2); if (buf2) { local_memcpy(buf, buf2, sizeof(*buf)); } return commit_raw_syscall(syscallno, ptr, ret); } static long sys_write(const struct syscall_info* call) { const int syscallno = SYS_write; int fd = call->args[0]; const void* buf = (const void*)call->args[1]; size_t count = call->args[2]; void* ptr = prep_syscall_for_fd(fd); long ret; assert(syscallno == call->no); if (!start_commit_buffered_syscall(syscallno, ptr, MAY_BLOCK)) { return traced_raw_syscall(call); } ret = untraced_syscall3(syscallno, fd, buf, count); return commit_raw_syscall(syscallno, ptr, ret); } static long sys_writev(const struct syscall_info* call) { int syscallno = SYS_writev; int fd = call->args[0]; const struct iovec* iov = (const struct iovec*)call->args[1]; unsigned long iovcnt = call->args[2]; void* ptr = prep_syscall_for_fd(fd); long ret; assert(syscallno == call->no); if (!start_commit_buffered_syscall(syscallno, ptr, MAY_BLOCK)) { return traced_raw_syscall(call); } ret = untraced_syscall3(syscallno, fd, iov, iovcnt); return commit_raw_syscall(syscallno, ptr, ret); } static long sys_gettid(const struct syscall_info* call) { const int syscallno = SYS_gettid; void* ptr = prep_syscall(); long ret; assert(syscallno == call->no); if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) { return traced_raw_syscall(call); } ret = untraced_syscall0(syscallno); return commit_raw_syscall(syscallno, ptr, ret); } static long sys_getpid(const struct syscall_info* call) { const int syscallno = SYS_getpid; void* ptr = prep_syscall(); long ret; assert(syscallno == call->no); if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) { return traced_raw_syscall(call); } ret = untraced_syscall0(syscallno); return commit_raw_syscall(syscallno, ptr, ret); } static long sys_getrusage(const struct syscall_info* call) { const int syscallno = SYS_getrusage; int who = (int)call->args[0]; struct rusage* buf = (struct rusage*)call->args[1]; void* ptr = prep_syscall(); long ret; struct rusage* buf2 = NULL; assert(syscallno == call->no); if (buf) { buf2 = ptr; ptr += sizeof(struct rusage); } if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) { return traced_raw_syscall(call); } ret = untraced_syscall2(syscallno, who, buf2); if (buf2 && ret >= 0) { local_memcpy(buf, buf2, sizeof(*buf)); } return commit_raw_syscall(syscallno, ptr, ret); } static long syscall_hook_internal(const struct syscall_info* call) { switch (call->no) { #define CASE(syscallname) \ case SYS_##syscallname: \ return sys_##syscallname(call) CASE(access); CASE(clock_gettime); CASE(close); CASE(creat); #if defined(SYS_fcntl64) CASE(fcntl64); #else CASE(fcntl); #endif CASE(futex); CASE(getpid); CASE(getrusage); CASE(gettid); CASE(gettimeofday); CASE(ioctl); #if defined(SYS__llseek) CASE(_llseek); #else CASE(lseek); #endif CASE(madvise); CASE(open); CASE(poll); CASE(read); CASE(readlink); #if defined(SYS_recvmsg) CASE(recvmsg); #endif #if defined(SYS_recvfrom) CASE(recvfrom); #endif #if defined(SYS_socketcall) CASE(socketcall); #endif CASE(time); CASE(write); CASE(writev); #undef CASE #if defined(SYS_fstat64) case SYS_fstat64: #else case SYS_fstat: #endif #if defined(SYS_lstat64) case SYS_lstat64: #else case SYS_lstat: #endif #if defined(SYS_stat64) case SYS_stat64: #else case SYS_stat: #endif return sys_xstat64(call); default: return traced_raw_syscall(call); } } /* Explicitly declare this as hidden so we can call it from * _syscall_hook_trampoline without doing all sorts of special PIC handling. */ RR_HIDDEN long syscall_hook(const struct syscall_info* call) { long result = syscall_hook_internal(call); if (buffer_hdr() && buffer_hdr()->notify_on_syscall_hook_exit) { // SYS_rrcall_notify_syscall_hook_exit will clear // notify_on_syscall_hook_exit. Clearing it ourselves is tricky to get // right without races. // // During recording, this flag is set when the recorder needs to delay // delivery of a signal until we've stopped using the syscallbuf. // During replay, this flag is set when the next event is entering a // SYS_rrcall_notify_syscall_hook_exit. // // The correctness argument is as follows: // Correctness requires that a) replay's setting of the flag happens before // we read the flag in the call to syscall_hook that triggered the // SYS_rrcall_notify_syscall_hook_exit and b) replay's setting of the flag // must happen after we read the flag in the previous execution of // syscall_hook. // Condition a) holds as long as no events are recorded between the // checking of the flag above and the execution of this syscall. This // should be the case; no synchronous signals or syscalls are // triggerable, all async signals other than SYSCALLBUF_DESCHED_SIGNAL // are delayed, and SYSCALLBUF_DESCHED_SIGNAL shouldn't fire since we've // disarmed the desched fd at this point. SYSCALLBUF_FLUSH events may be // emitted when we process the SYS_rrcall_notify_syscall_hook_exit event, // but replay of those events ends at the last flushed syscall, before // we exit syscall_hook_internal. // Condition b) failing would mean no new events were generated between // testing the flag in the previous syscall_hook and the execution of this // SYS_rrcall_notify_syscall_hook_exit. However, every invocation of // syscall_hook_internal generates either a traced syscall or a syscallbuf // record that would be flushed by SYSCALLBUF_FLUSH, so that can't // happen. // // Another crazy thing is going on here: it's possible that a signal // intended to be delivered result = _raw_syscall( SYS_rrcall_notify_syscall_hook_exit, call->args[0], call->args[1], call->args[2], call->args[3], call->args[4], call->args[5], privileged_traced_syscall_instruction, result, call->no); } return result; } /** * Exported glibc synonym for |sysconf()|. We can't use |dlsym()| to * resolve the next "sysconf" symbol, because * - dlysym usually calls malloc() * - custom allocators like jemalloc may use sysconf() * - if our sysconf wrapper is re-entered during initialization, it * has nothing to fall back on to get the conf name, and chaos will * likely ensue if we return something random. */ long __sysconf(int name); /** * Pretend that only 1 processor is configured/online, because rr * binds all tracees to one logical CPU. */ long sysconf(int name) { switch (name) { case _SC_NPROCESSORS_ONLN: case _SC_NPROCESSORS_CONF: return 1; } return __sysconf(name); } /** Disable XShm since rr doesn't work with it */ int XShmQueryExtension(__attribute__((unused)) void* dpy) { return 0; } /** Make sure XShmCreateImage returns null in case an application doesn't do extension checks first. */ void* XShmCreateImage(__attribute__((unused)) register void* dpy, __attribute__((unused)) register void* visual, __attribute__((unused)) unsigned int depth, __attribute__((unused)) int format, __attribute__((unused)) char* data, __attribute__((unused)) void* shminfo, __attribute__((unused)) unsigned int width, __attribute__((unused)) unsigned int height) { return 0; } rr-4.1.0/src/preload/preload_interface.h000066400000000000000000000244421265436462100202120ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_PRELOAD_INTERFACE_H_ #define RR_PRELOAD_INTERFACE_H_ #include #include #include #ifndef RR_IMPLEMENT_PRELOAD #include "../remote_ptr.h" #endif /* This header file is included by preload.c and various rr .cc files. It * defines the interface between the preload library and rr. preload.c * #defines RR_IMPLEMENT_PRELOAD to let us handle situations where rr and * preload.c need to see slightly different definitions of the same constructs. * * preload.c compiles this as C code. All rr modules compile this as C++ code. * We do not use 'extern "C"' because we don't actually link between C and C++ * and 'extern "C"' is not compatible with our use of templates below. */ #define SYSCALLBUF_LIB_FILENAME_BASE "librrpreload" #define SYSCALLBUF_LIB_FILENAME SYSCALLBUF_LIB_FILENAME_BASE ".so" #define SYSCALLBUF_LIB_FILENAME_PADDED SYSCALLBUF_LIB_FILENAME_BASE ".so:::" #define SYSCALLBUF_LIB_FILENAME_32 SYSCALLBUF_LIB_FILENAME_BASE "_32.so" /* This is pretty arbitrary. On Linux SIGPWR is sent to PID 1 (init) on * power failure, and it's unlikely rr will be recording that. * Note that SIGUNUSED means SIGSYS which actually *is* used (by seccomp), * so we can't use it. */ #define SYSCALLBUF_DESCHED_SIGNAL SIGPWR /* This size counts the header along with record data. */ #define SYSCALLBUF_BUFFER_SIZE (1 << 20) /* Set this env var to enable syscall buffering. */ #define SYSCALLBUF_ENABLED_ENV_VAR "_RR_USE_SYSCALLBUF" /* Size of table mapping fd numbers to syscallbuf-disabled flag. * Most Linux kernels limit fds to 1024 so it probably doesn't make sense * to raise this value... */ #define SYSCALLBUF_FDS_DISABLED_SIZE 1024 #define RR_PAGE_ADDR 0x70000000 #define RR_PAGE_SYSCALL_STUB_SIZE 3 #define RR_PAGE_SYSCALL_INSTRUCTION_END 2 #define RR_PAGE_IN_TRACED_SYSCALL_ADDR \ (RR_PAGE_ADDR + RR_PAGE_SYSCALL_INSTRUCTION_END) #define RR_PAGE_IN_PRIVILEGED_TRACED_SYSCALL_ADDR \ (RR_PAGE_ADDR + RR_PAGE_SYSCALL_STUB_SIZE + RR_PAGE_SYSCALL_INSTRUCTION_END) #define RR_PAGE_IN_UNTRACED_REPLAYED_SYSCALL_ADDR \ (RR_PAGE_ADDR + RR_PAGE_SYSCALL_STUB_SIZE * 2 + \ RR_PAGE_SYSCALL_INSTRUCTION_END) #define RR_PAGE_IN_UNTRACED_SYSCALL_ADDR \ (RR_PAGE_ADDR + RR_PAGE_SYSCALL_STUB_SIZE * 3 + \ RR_PAGE_SYSCALL_INSTRUCTION_END) #define RR_PAGE_IN_PRIVILEGED_UNTRACED_SYSCALL_ADDR \ (RR_PAGE_ADDR + RR_PAGE_SYSCALL_STUB_SIZE * 4 + \ RR_PAGE_SYSCALL_INSTRUCTION_END) /* "Magic" (rr-implemented) syscalls that we use to initialize the * syscallbuf. * * NB: magic syscalls must be positive, because with at least linux * 3.8.0 / eglibc 2.17, rr only gets a trap for the *entry* of invalid * syscalls, not the exit. rr can't handle that yet. */ /* TODO: static_assert(LAST_SYSCALL < SYS_rrcall_init_buffers) */ /** * The preload library calls SYS_rrcall_init_preload during its * initialization. */ #define SYS_rrcall_init_preload 442 /** * The preload library calls SYS_rrcall_init_buffers in each thread that * gets created (including the initial main thread). */ #define SYS_rrcall_init_buffers 443 /** * The preload library calls SYS_rrcall_notify_syscall_hook_exit when * unlocking the syscallbuf and notify_after_syscall_hook_exit has been set. * The word at 4/8(sp) is returned in the syscall result and the word at * 8/16(sp) is stored in original_syscallno. */ #define SYS_rrcall_notify_syscall_hook_exit 444 /* Define macros that let us compile a struct definition either "natively" * (when included by preload.c) or as a template over Arch for use by rr. */ #ifdef RR_IMPLEMENT_PRELOAD #define TEMPLATE_ARCH #define PTR(T) T * #else #define TEMPLATE_ARCH template #define PTR(T) typename Arch::template ptr #endif /** * To support syscall buffering, we replace syscall instructions with a "call" * instruction that calls a hook in the preload library to handle the syscall. * Since the call instruction takes more space than the syscall instruction, * the patch replaces one or more instructions after the syscall instruction as * well; those instructions are folded into the tail of the hook function * and we have multiple hook functions, each one corresponding to an * instruction that follows a syscall instruction. * Each instance of this struct describes an instruction that can follow a * syscall and a hook function to patch with. */ struct syscall_patch_hook { uint8_t next_instruction_length; uint8_t next_instruction_bytes[6]; uint64_t hook_address; }; /** * Packs up the parameters passed to |SYS_rrcall_init_preload|. * We use this struct because it's a little cleaner. */ TEMPLATE_ARCH struct rrcall_init_preload_params { /* "In" params. */ /* The syscallbuf lib's idea of whether buffering is enabled. * We let the syscallbuf code decide in order to more simply * replay the same decision that was recorded. */ int syscallbuf_enabled; int syscall_patch_hook_count; PTR(struct syscall_patch_hook) syscall_patch_hooks; PTR(void) syscall_hook_trampoline; PTR(void) syscall_hook_stub_buffer; PTR(void) syscall_hook_stub_buffer_end; /* Array of size SYSCALLBUF_FDS_DISABLED_SIZE */ PTR(volatile char) syscallbuf_fds_disabled; /* Address of the flag which is 0 during recording and 1 during replay. */ PTR(unsigned char) in_replay_flag; /* Address of the first entry of the breakpoint table. * After processing a sycallbuf record (and unlocking the syscallbuf), * we call a function in this table corresponding to the record processed. * rr can set a breakpoint in this table to break on the completion of a * particular syscallbuf record. */ PTR(void) breakpoint_table; int breakpoint_table_entry_size; }; /** * Packs up the inout parameters passed to |SYS_rrcall_init_buffers|. * We use this struct because there are too many params to pass * through registers on at least x86. (It's also a little cleaner.) */ TEMPLATE_ARCH struct rrcall_init_buffers_params { /* The fd we're using to track desched events. */ int desched_counter_fd; /* padding for 64-bit archs. Structs written to tracee memory must not have * holes! */ int padding; /* "Out" params. */ /* Returned pointer to and size of the shared syscallbuf * segment. */ PTR(void) syscallbuf_ptr; }; /** * The syscall buffer comprises an array of these variable-length * records, along with the header below. */ struct syscallbuf_record { /* Return value from the syscall. This can be a memory * address, so must be as big as a memory address can be. * We use 64 bits rather than make syscallbuf_record Arch-specific as that * gets cumbersome. */ int64_t ret; /* Syscall number. * * NB: the x86 linux ABI has 350 syscalls as of 3.9.6 and * x86-64 defines 313, so this is a pretty safe storage * allocation. It would be an earth-shattering event if the * syscall surface were doubled in a short period of time, and * even then we would have a comfortable cushion. Still, * * TODO: static_assert this can hold largest syscall num */ uint16_t syscallno; /* Did the tracee arm/disarm the desched notification for this * syscall? */ uint8_t desched; uint8_t _padding; /* Size of entire record in bytes: this struct plus extra * recorded data stored inline after the last field, not * including padding. * * TODO: static_assert this can repr >= buffer size */ uint32_t size; /* Extra recorded outparam data starts here. */ uint8_t extra_data[0]; }; /** * This struct summarizes the state of the syscall buffer. It happens * to be located at the start of the buffer. */ struct syscallbuf_hdr { /* The number of valid syscallbuf_record bytes in the buffer, * not counting this header. * Make this volatile so that memory writes aren't reordered around * updates to this field. */ volatile uint32_t num_rec_bytes; /* True if the current syscall should not be committed to the * buffer, for whatever reason; likely interrupted by * desched. Set by rr. */ uint8_t abort_commit; /* True if, next time we exit the syscall buffer hook, libpreload should * execute SYS_rrcall_notify_syscall_hook_exit to give rr the opportunity to * deliver a signal and/or reset the syscallbuf. */ uint8_t notify_on_syscall_hook_exit; /* This tracks whether the buffer is currently in use for a * system call. This is helpful when a signal handler runs * during a wrapped system call; we don't want it to use the * buffer for its system calls. */ uint8_t locked; /* Nonzero when rr needs to worry about the desched signal. * When it's zero, the desched signal can safely be * discarded. */ uint8_t desched_signal_may_be_relevant; struct syscallbuf_record recs[0]; } __attribute__((__packed__)); /* TODO: static_assert(sizeof(uint32_t) == * sizeof(struct syscallbuf_hdr)) */ /** * Return a pointer to what may be the next syscall record. * * THIS POINTER IS NOT GUARANTEED TO BE VALID!!! Caveat emptor. */ inline static struct syscallbuf_record* next_record( struct syscallbuf_hdr* hdr) { uintptr_t next = (uintptr_t)hdr->recs + hdr->num_rec_bytes; return (struct syscallbuf_record*)next; } /** * Return the amount of space that a record of |length| will occupy in * the buffer if committed, including padding. */ inline static long stored_record_size(size_t length) { /* Round up to a whole number of 64-bit words. */ return (length + 7) & ~7; } /** * Return nonzero if an attempted open() of |filename| should be * blocked. * * The background of this hack is that rr doesn't support DRI/DRM * currently, so we use the blunt stick of refusing to open this * interface file as a way of disabling it entirely. (In addition to * tickling xorg.conf, which doesn't entirely do the trick.) It's * known how to fix this particular, so let's not let this hack grow * too much by piling on. */ inline static int is_blacklisted_filename(const char* filename) { return !strncmp("/dev/dri/", filename, 9) || !strcmp("/dev/nvidiactl", filename) || !strcmp("/usr/share/alsa/alsa.conf", filename); } #endif /* RR_PRELOAD_INTERFACE_H_ */ rr-4.1.0/src/preload/raw_syscall.S000066400000000000000000000127371265436462100170460ustar00rootroot00000000000000#if defined(__i386__) .text .globl _raw_syscall .hidden _raw_syscall .type _raw_syscall, @function _raw_syscall: /* syscallno = 4(%esp) */ .cfi_startproc pushl %ebx /* syscallno = 8(%esp) */ .cfi_adjust_cfa_offset 4 .cfi_rel_offset %ebx, 0 pushl %esi /* syscallno = 12(%esp) */ .cfi_adjust_cfa_offset 4 .cfi_rel_offset %esi, 0 pushl %edi /* syscallno = 16(%esp) */ .cfi_adjust_cfa_offset 4 .cfi_rel_offset %edi, 0 pushl %ebp /* syscallno = 20(%esp) */ .cfi_adjust_cfa_offset 4 .cfi_rel_offset %ebp, 0 movl 20(%esp), %eax /* %eax = syscallno */ movl 24(%esp), %ebx /* %ebx = a0 */ movl 28(%esp), %ecx /* %ecx = a1 */ movl 32(%esp), %edx /* %edx = a2 */ movl 36(%esp), %esi /* %esi = a3 */ movl 40(%esp), %edi /* %edi = a4 */ movl 44(%esp), %ebp /* %ebp = a5 */ pushl 56(%esp) .cfi_adjust_cfa_offset 4 pushl 56(%esp) .cfi_adjust_cfa_offset 4 call *56(%esp) addl $8,%esp .cfi_adjust_cfa_offset -8 popl %ebp .cfi_adjust_cfa_offset -4 .cfi_restore %ebp popl %edi .cfi_adjust_cfa_offset -4 .cfi_restore %edi popl %esi .cfi_adjust_cfa_offset -4 .cfi_restore %esi popl %ebx .cfi_adjust_cfa_offset -4 .cfi_restore %ebx ret .cfi_endproc .size _raw_syscall, . - _raw_syscall #elif defined(__x86_64__) .text .globl _raw_syscall .hidden _raw_syscall .type _raw_syscall, @function _raw_syscall: .cfi_startproc /* Incoming args are in %rdi, %rsi, %rdx, %rcx, %r8, %r9, and 8(%rsp). Syscall arguments are %rdi, %rsi, %rdx, %r10, %r8, %r9. */ movq %rdi, %rax /* syscall number */ movq %rsi, %rdi /* first syscall arg */ movq %rdx, %rsi /* second syscall arg */ movq %rcx, %rdx /* third syscall arg */ movq %r8, %r10 /* fourth syscall arg */ movq %r9, %r8 /* fifth syscall arg */ movq 8(%rsp), %r9 /* sixth syscall arg */ pushq 32(%rsp) .cfi_adjust_cfa_offset 8 pushq 32(%rsp) .cfi_adjust_cfa_offset 8 /* During a system call the kernel makes some user-space-visible register changes: a) on entry, %r11 is set to %rflags b) %rcx is sometimes set to -1 (perhaps because of something rr does) c) on entry or exit, some flags are sometimes changed Also, during replay we may perform single-stepping which can set TF (trace flag). We need to hide this. fixup_syscall_registers is responsible for fixing up registers to hide these effects when we get a ptrace trap from system calls in the kernel: it clears TF from %r11, forces %rcx to -1, and sets flags to fixed values (ZF+PF+IF+reserved, same as for "xor reg,reg"). Task::emulate_syscall_entry is responsible for fixing up registers when we emulate a system call that was traced during recording (by running to a breakpoint at that system call). It does the above effects after setting %r11 to %rflags. For untraced system calls there is no trap to rr during recording or replay, so we must handle these issues here. We do not need untraced system calls to behave exactly the same as traced system calls, since whether a given system call was traced or not is the same whether recording or replaying, but it's a good idea to make them as similar as possible. We do need register values to be perfectly consistent at every instruction in every replay whether or not singlestepping is used (because a ReplayTimeline::mark might be created at any point). During replay, untraced syscall instructions are replaced with "xor %eax,%eax". The following code is harmless for traced syscalls (and needs to be, because traced syscalls go through here too). */ /* Set %r11 and %rcx to the values we expect them to have after the system call. Set flags to ZF+PF+IF+reserved (0x246) first. This simplifies everything. This all has to be independent of TF being set at any point during replay! But the way we're doing it here, it's trivial. */ xor %ecx,%ecx /* At this point, flags are 0x246 + possibly TF. */ movq $0x246,%r11 movq $-1,%rcx callq *32(%rsp) /* At this point, during recording we don't trust the kernel to have restored flags correctly. It probably doesn't matter, but fix it anyway. */ xor %ecx,%ecx /* At this point, the high 32 bits of %rcx are unknown. Fix that by setting to -1 to match traced syscalls. */ movq $-1,%rcx /* At this point, %r11 is always 0x246 during replay and during recording (because TF is never set during recording). Nothing to fix in %r11. */ addq $16,%rsp .cfi_adjust_cfa_offset -16 ret .cfi_endproc .size _raw_syscall, . - _raw_syscall #else #error unknown CPU architecture #endif /* __i386__/__x86_64__ */ .section .note.GNU-stack,"",@progbits .previous rr-4.1.0/src/preload/syscall_hook.S000066400000000000000000000274321265436462100172130ustar00rootroot00000000000000 /* This must match the numbers in X86SyscallStubMonkeypatch and X64SyscallStubMonkeypatch */ .set _syscall_stack_adjust,256 .global _stub_buffer .hidden _stub_buffer .global _stub_buffer_end .hidden _stub_buffer_end .global _syscall_hook_trampoline .hidden _syscall_hook_trampoline .type _syscall_hook_trampoline, @function #if defined(__i386__) /** * Jump to this hook from |__kernel_vsyscall()|, to buffer syscalls that * we otherwise couldn't wrap through LD_PRELOAD helpers. Return the * *RAW* kernel return value, not the -1/errno mandated by POSIX. * * Remember, this function runs *below* the level of libc. libc can't * know that its call to |__kernel_vsyscall()| has been re-routed to * us. */ .text _syscall_hook_trampoline: .cfi_startproc /* Build a |struct syscall_info| by pushing all the syscall * args and the number onto the stack. */ /* struct syscall_info info; */ pushl %ebp /* info.args[5] = $ebp; */ .cfi_adjust_cfa_offset 4 .cfi_rel_offset %ebp, 0 pushl %edi /* info.args[4] = $edi; */ .cfi_adjust_cfa_offset 4 .cfi_rel_offset %edi, 0 pushl %esi /* info.args[3] = $esi; */ .cfi_adjust_cfa_offset 4 .cfi_rel_offset %esi, 0 pushl %edx /* info.args[2] = $edx; */ .cfi_adjust_cfa_offset 4 .cfi_rel_offset %edx, 0 pushl %ecx /* info.args[1] = $ecx; */ .cfi_adjust_cfa_offset 4 .cfi_rel_offset %ecx, 0 pushl %ebx /* info.args[0] = $ebx; */ .cfi_adjust_cfa_offset 4 .cfi_rel_offset %ebx, 0 pushl %eax /* info.no = $eax; */ .cfi_adjust_cfa_offset 4 /* $esp points at &info. Push that pointer on the stack as * our arg for vsyscall_hook(). */ movl %esp, %ecx pushl %ecx .cfi_adjust_cfa_offset 4 call syscall_hook /* $eax = vsyscall_hook(&info); */ /* $eax is now the syscall return value. Erase the |&info| * arg and |info.no| from the stack so that we can restore the * other registers we saved. */ addl $8, %esp .cfi_adjust_cfa_offset -8 /* Contract of __kernel_vsyscall() and real syscalls is that even * callee-save registers aren't touched, so we restore everything * here. */ popl %ebx .cfi_adjust_cfa_offset -4 .cfi_restore %ebx popl %ecx .cfi_adjust_cfa_offset -4 .cfi_restore %ecx popl %edx .cfi_adjust_cfa_offset -4 .cfi_restore %edx popl %esi .cfi_adjust_cfa_offset -4 .cfi_restore %esi popl %edi .cfi_adjust_cfa_offset -4 .cfi_restore %edi popl %ebp .cfi_adjust_cfa_offset -4 .cfi_restore %ebp ret .cfi_endproc .size _syscall_hook_trampoline, .-_syscall_hook_trampoline .global _syscall_hook_trampoline_3d_01_f0_ff_ff .hidden _syscall_hook_trampoline_3d_01_f0_ff_ff .type _syscall_hook_trampoline_3d_01_f0_ff_ff, @function _syscall_hook_trampoline_3d_01_f0_ff_ff: .cfi_startproc call _syscall_hook_trampoline cmpl $0xfffff001,%eax ret .cfi_endproc .size _syscall_hook_trampoline_3d_01_f0_ff_ff, .-_syscall_hook_trampoline_3d_01_f0_ff_ff .global _syscall_hook_trampoline_90_90_90 .hidden _syscall_hook_trampoline_90_90_90 .type _syscall_hook_trampoline_90_90_90, @function _syscall_hook_trampoline_90_90_90: .cfi_startproc jmp _syscall_hook_trampoline .cfi_endproc .size _syscall_hook_trampoline_90_90_90, .-_syscall_hook_trampoline_90_90_90 _stub_buffer: .rept 1000 /* Must match X86SyscallStubMonkeypatch. We reproduce it here so we can build the correct CFI unwinding info, so gdb gives good stack traces from inside the syscall hook code. */ .cfi_startproc /* Save fake return address and old sp to the stack, for gdb to use during stack unwinding. Addresses will be filled in by rr. */ movl $0x12345678,-_syscall_stack_adjust(%esp) /* Backtrace here will be invalid! */ mov %esp,(-_syscall_stack_adjust+4)(%esp) /* Backtrace here will be invalid! */ lea -_syscall_stack_adjust(%esp),%esp .cfi_rel_offset %esp,4 /* We won't be able to get complete stack traces inside the above sequence, but that's not important. What's important is that at this point: * (%esp) contains a "return address" for this stub that points back to the patch site. * (%esp+4) contains the value of %esp that will hold at the patch site, and we've emitted CFI data to indicate that. This makes gdb treat the patch site as the caller of this stub, even though no call actually happened. */ call _stub_buffer /* FAKE, filled in by rr */ /* A backtrace here will be valid since for unwinding purposes we're basically in the same state as before the call. */ lea _syscall_stack_adjust(%esp),%esp /* Backtrace here will be invalid! */ /* Jump to the return address we saved on the stack */ jmp *(-_syscall_stack_adjust)(%esp) .cfi_endproc .endr _stub_buffer_end: #elif defined(__x86_64__) .text .p2align 4 _syscall_hook_trampoline: .cfi_startproc /* Build a |struct syscall_info| on the stack by pushing the arguments and syscall number. */ pushq %r9 .cfi_adjust_cfa_offset 8 .cfi_rel_offset %r9, 0 pushq %r8 .cfi_adjust_cfa_offset 8 .cfi_rel_offset %r8, 0 pushq %r10 .cfi_adjust_cfa_offset 8 .cfi_rel_offset %r10, 0 pushq %rdx .cfi_adjust_cfa_offset 8 .cfi_rel_offset %rdx, 0 pushq %rsi .cfi_adjust_cfa_offset 8 .cfi_rel_offset %rsi, 0 pushq %rdi .cfi_adjust_cfa_offset 8 .cfi_rel_offset %rdi, 0 pushq %rax .cfi_adjust_cfa_offset 8 .cfi_rel_offset %rax, 0 /* Call our hook. */ mov %rsp, %rdi callq syscall_hook /* On entrance, we pushed the %rax, the syscall number. But we don't want to |pop %rax|, as that will overwrite our return value. Pop into %r11 instead. */ pop %r11 .cfi_adjust_cfa_offset -8 /* We don't really *need* to restore these, since the kernel could have trashed them all anyway. But it seems reasonable to do so. */ pop %rdi .cfi_adjust_cfa_offset -8 .cfi_restore %rdi pop %rsi .cfi_adjust_cfa_offset -8 .cfi_restore %rsi pop %rdx .cfi_adjust_cfa_offset -8 .cfi_restore %rdx pop %r10 .cfi_adjust_cfa_offset -8 .cfi_restore %r10 pop %r8 .cfi_adjust_cfa_offset -8 .cfi_restore %r8 pop %r9 .cfi_adjust_cfa_offset -8 .cfi_restore %r9 /* ...and we're done. */ ret .cfi_endproc .size _syscall_hook_trampoline, . - _syscall_hook_trampoline .global _syscall_hook_trampoline_48_3d_01_f0_ff_ff .hidden _syscall_hook_trampoline_48_3d_01_f0_ff_ff .type _syscall_hook_trampoline_48_3d_01_f0_ff_ff, @function _syscall_hook_trampoline_48_3d_01_f0_ff_ff: .cfi_startproc callq _syscall_hook_trampoline cmpq $0xfffffffffffff001,%rax ret .cfi_endproc .size _syscall_hook_trampoline_48_3d_01_f0_ff_ff, .-_syscall_hook_trampoline_48_3d_01_f0_ff_ff .global _syscall_hook_trampoline_48_3d_00_f0_ff_ff .hidden _syscall_hook_trampoline_48_3d_00_f0_ff_ff .type _syscall_hook_trampoline_48_3d_00_f0_ff_ff, @function _syscall_hook_trampoline_48_3d_00_f0_ff_ff: .cfi_startproc callq _syscall_hook_trampoline cmpq $0xfffffffffffff000,%rax ret .cfi_endproc .size _syscall_hook_trampoline_48_3d_00_f0_ff_ff, .-_syscall_hook_trampoline_48_3d_00_f0_ff_ff .global _syscall_hook_trampoline_48_8b_3c_24 .hidden _syscall_hook_trampoline_48_8b_3c_24 .type _syscall_hook_trampoline_48_8b_3c_24, @function _syscall_hook_trampoline_48_8b_3c_24: .cfi_startproc callq _syscall_hook_trampoline /* The original instruction after the syscall is movq (%rsp),%rdi. Because we pushed a return address and shifted RSP down before reaching this point, to get the equivalent behavior we need to use this offset. */ movq (8 + _syscall_stack_adjust)(%rsp),%rdi ret .cfi_endproc .size _syscall_hook_trampoline_48_8b_3c_24, .-_syscall_hook_trampoline_48_8b_3c_24 .global _syscall_hook_trampoline_5a_5e_c3 .hidden _syscall_hook_trampoline_5a_5e_c3 .type _syscall_hook_trampoline_5a_5e_c3, @function _syscall_hook_trampoline_5a_5e_c3: .cfi_startproc callq _syscall_hook_trampoline /* The original instructions after the syscall are pop %rdx; pop %rsi; retq. */ pop %rdx /* Return address, ignored */ /* We're not returning to the dynamically generated stub, so we need to fix the stack pointer ourselves. */ add $_syscall_stack_adjust,%rsp pop %rdx pop %rsi ret .cfi_endproc .size _syscall_hook_trampoline_5a_5e_c3, .-_syscall_hook_trampoline_5a_5e_c3 .global _syscall_hook_trampoline_90_90_90 .hidden _syscall_hook_trampoline_90_90_90 .type _syscall_hook_trampoline_90_90_90, @function _syscall_hook_trampoline_90_90_90: .cfi_startproc jmp _syscall_hook_trampoline .cfi_endproc .size _syscall_hook_trampoline_90_90_90, .-_syscall_hook_trampoline_90_90_90 _stub_buffer: .rept 1000 /* Must match X64SyscallStubMonkeypatch. We reproduce it here so we can build the correct CFI unwinding info, so gdb gives good stack traces from inside the syscall hook code. */ .cfi_startproc /* Save fake return address and old sp to the stack, for gdb to use during stack unwinding. Addresses will be filled in by rr. */ movl $0x12345678,-_syscall_stack_adjust(%rsp) /* Backtrace here will be invalid! */ movl $0x12345678,(-_syscall_stack_adjust+4)(%rsp) /* Backtrace here will be invalid! */ mov %rsp,(-_syscall_stack_adjust+8)(%rsp) /* Backtrace here will be invalid! */ lea -_syscall_stack_adjust(%rsp),%rsp .cfi_rel_offset %rsp,8 /* We won't be able to get complete stack traces inside the above sequence, but that's not important. What's important is that at this point: * (%rsp) contains a "return address" for this stub that points back to the patch site. * (%rsp+8) contains the value of %rsp that will hold at the patch site, and we've emitted CFI data to indicate that. This makes gdb treat the patch site as the caller of this stub, even though no call actually happened. */ call _stub_buffer /* FAKE, filled in by rr */ /* A backtrace here will be valid since for unwinding purposes we're basically in the same state as before the call. */ lea _syscall_stack_adjust(%rsp),%rsp /* Backtrace here will be invalid! */ /* Jump to the return address we saved on the stack */ jmp *(-_syscall_stack_adjust)(%rsp) .cfi_endproc .endr _stub_buffer_end: #endif /* __x86_64__ */ .section .note.GNU-stack,"",@progbits rr-4.1.0/src/record_signal.cc000066400000000000000000000410171265436462100160640ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "Signal" #include "record_signal.h" #include #include #include #include #include #include #include #include #include #include #include "preload/preload_interface.h" #include "AutoRemoteSyscalls.h" #include "Flags.h" #include "kernel_metadata.h" #include "log.h" #include "PerfCounters.h" #include "RecordSession.h" #include "task.h" #include "TraceStream.h" #include "util.h" using namespace rr; using namespace std; static __inline__ unsigned long long rdtsc(void) { return __rdtsc(); } static const int STOPSIG_SYSCALL = 0x80 | SIGTRAP; template static size_t sigaction_sigset_size_arch() { return Arch::sigaction_sigset_size; } static size_t sigaction_sigset_size(SupportedArch arch) { RR_ARCH_FUNCTION(sigaction_sigset_size_arch, arch); } /** * Restore the blocked-ness and sigaction for SIGSEGV from |t|'s local * copy. */ static void restore_sigsegv_state(Task* t) { const vector& sa = t->signal_action(SIGSEGV); AutoRemoteSyscalls remote(t); { AutoRestoreMem child_sa(remote, sa.data(), sa.size()); remote.infallible_syscall(syscall_number_for_rt_sigaction(remote.arch()), SIGSEGV, child_sa.get().as_int(), nullptr, sigaction_sigset_size(remote.arch())); } // NB: we would normally want to restore the SIG_BLOCK for // SIGSEGV here, but doing so doesn't change the kernel's // "SigBlk" mask. There's no bug observed in the kernel's // delivery of SIGSEGV after the RDTSC trap, so we do nothing // here and move on. } /** Return true iff |t->ip()| points at a RDTSC instruction. */ static const uint8_t rdtsc_insn[] = { 0x0f, 0x31 }; static bool is_ip_rdtsc(Task* t) { uint8_t insn[sizeof(rdtsc_insn)]; if (sizeof(insn) != t->read_bytes_fallible(t->ip().to_data_ptr(), sizeof(insn), insn)) { return false; } return !memcmp(insn, rdtsc_insn, sizeof(insn)); } /** * Return true if |t| was stopped because of a SIGSEGV resulting * from a rdtsc and |t| was updated appropriately, false otherwise. */ static bool try_handle_rdtsc(Task* t, siginfo_t* si) { ASSERT(t, si->si_signo == SIGSEGV); if (!is_ip_rdtsc(t)) { return false; } unsigned long long current_time = rdtsc(); Registers r = t->regs(); r.set_rdtsc_output(current_time); r.set_ip(r.ip() + sizeof(rdtsc_insn)); t->set_regs(r); t->push_event(Event(EV_SEGV_RDTSC, HAS_EXEC_INFO, t->arch())); LOG(debug) << " trapped for rdtsc: returning " << current_time; return true; } /** * Return true if |t| was stopped because of a SIGSEGV and we want to retry * the instruction after emulating MAP_GROWSDOWN. */ static bool try_grow_map(Task* t, siginfo_t* si) { ASSERT(t, si->si_signo == SIGSEGV); // Use kernel_abi to avoid odd inconsistencies between distros auto arch_si = reinterpret_cast(si); auto addr = arch_si->_sifields._sigfault.si_addr_.rptr(); auto maps = t->vm()->maps_starting_at(floor_page_size(addr)); auto it = maps.begin(); if (it == maps.end() || addr >= it->map.start() || !(it->map.flags() & MAP_GROWSDOWN)) { return false; } auto new_start = floor_page_size(addr); static const uintptr_t grow_size = 0x10000; if (it->map.start().as_int() >= grow_size) { auto possible_new_start = std::min(new_start, it->map.start() - grow_size); auto earlier_maps = t->vm()->maps_starting_at(possible_new_start); if (earlier_maps.begin()->map.start() == it->map.start()) { // No intervening map new_start = possible_new_start; } } struct rlimit stack_limit; int ret = prlimit(t->tid, RLIMIT_STACK, NULL, &stack_limit); if (ret >= 0 && stack_limit.rlim_cur != RLIM_INFINITY) { new_start = std::max(new_start, ceil_page_size(it->map.end() - stack_limit.rlim_cur)); if (new_start > addr) { return false; } } { AutoRemoteSyscalls remote(t, AutoRemoteSyscalls::DISABLE_MEMORY_PARAMS); remote.infallible_mmap_syscall( new_start, it->map.start() - new_start, it->map.prot(), (it->map.flags() & ~MAP_GROWSDOWN) | MAP_ANONYMOUS, -1, 0); } KernelMapping km = t->vm()->map(new_start, it->map.start() - new_start, it->map.prot(), it->map.flags() | MAP_ANONYMOUS, 0, string(), KernelMapping::NO_DEVICE, KernelMapping::NO_INODE); t->trace_writer().write_mapped_region(km, km.fake_stat()); // No need to flush syscallbuf here. It's safe to map these pages "early" // before they're really needed. t->record_event(Event(EV_GROW_MAP, NO_EXEC_INFO, t->arch()), Task::DONT_FLUSH_SYSCALLBUF); t->push_event(Event::noop(t->arch())); LOG(debug) << " trapped for MAP_GROWSDOWN"; return true; } void disarm_desched_event(Task* t) { if (ioctl(t->desched_fd, PERF_EVENT_IOC_DISABLE, 0)) { FATAL() << "Failed to disarm desched event"; } } void arm_desched_event(Task* t) { if (ioctl(t->desched_fd, PERF_EVENT_IOC_ENABLE, 0)) { FATAL() << "Failed to disarm desched event"; } } /** * Return the event needing to be processed after this desched of |t|. * The tracee's execution may be advanced, and if so |regs| is updated * to the tracee's latest state. */ static void handle_desched_event(Task* t, const siginfo_t* si) { ASSERT(t, (SYSCALLBUF_DESCHED_SIGNAL == si->si_signo && si->si_code == POLL_IN && si->si_fd == t->desched_fd_child)) << "Tracee is using SIGPWR??? (code=" << si->si_code << ", fd=" << si->si_fd << ")"; /* If the tracee isn't in the critical section where a desched * event is relevant, we can ignore it. See the long comments * in syscall_buffer.c. * * It's OK if the tracee is in the critical section for a * may-block syscall B, but this signal was delivered by an * event programmed by a previous may-block syscall A. * * If we're running in a signal handler inside an interrupted syscallbuf * system call, never do anything here. Syscall buffering is disabled and * the desched_signal_may_be_relevant was set by the outermost syscallbuf * invocation. */ if (!t->syscallbuf_hdr->desched_signal_may_be_relevant || t->running_inside_desched()) { LOG(debug) << " (not entering may-block syscall; resuming)"; /* We have to disarm the event just in case the tracee * has cleared the relevancy flag, but not yet * disarmed the event itself. */ disarm_desched_event(t); t->push_event(Event::noop(t->arch())); return; } /* TODO: how can signals interrupt us here? */ /* The desched event just fired. That implies that the * arm-desched ioctl went into effect, and that the * disarm-desched syscall didn't take effect. Since a signal * is pending for the tracee, then if the tracee was in a * syscall, linux has exited it with an -ERESTART* error code. * That means the tracee is about to (re-)enter either * * 1. buffered syscall * 2. disarm-desched ioctl syscall * * We can figure out which one by simply issuing a * ptrace(SYSCALL) and examining the tracee's registers. * * If the tracee enters the disarm-desched ioctl, it's going * to commit a record of the buffered syscall to the * syscallbuf, and we can safely send the tracee back on its * way, ignoring the desched completely. * * If it enters the buffered syscall, then the desched event * has served its purpose and we need to prepare the tracee to * be context-switched. * * An annoyance of the desched signal is that when the tracer * is descheduled in interval (C) above, we see normally (see * below) see *two* signals. The current theory of what's * happening is * * o child gets descheduled, bumps counter to i and schedules * signal * o signal notification "schedules" child, but it doesn't * actually run any application code * o child is being ptraced, so we "deschedule" child to * notify parent and bump counter to i+1. (The parent * hasn't had a chance to clear the counter yet.) * o another counter signal is generated, but signal is * already pending so this one is queued * o parent is notified and sees counter value i+1 * o parent stops delivery of first signal and disarms * counter * o second signal dequeued and delivered, notififying parent * (counter is disarmed now, so no pseudo-desched possible * here) * o parent notifiedand sees counter value i+1 again * o parent stops delivery of second signal and we continue on * * So we "work around" this by the tracer expecting two signal * notifications, and silently discarding both. * * One really fun edge case is that sometimes the desched * signal will interrupt the arm-desched syscall itself. * Continuing to the next syscall boundary seems to restart * the arm-desched syscall, and advancing to the boundary * again exits it and we start receiving desched signals * again. * * That may be a kernel bug, but we handle it by just * continuing until we we continue past the arm-desched * syscall *and* stop seeing signals. */ while (true) { // Prevent further desched notifications from firing // while we're advancing the tracee. We're going to // leave it in a consistent state anyway, so the event // is no longer useful. We have to do this in each // loop iteration because a restarted arm-desched // syscall may have re-armed the event. disarm_desched_event(t); t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_UNLIMITED_TICKS); int sig = t->stop_sig(); if (STOPSIG_SYSCALL == sig) { if (t->is_arm_desched_event_syscall()) { continue; } break; } // Completely ignore spurious desched signals and // signals that aren't going to be delivered to the // tracee. // // Also ignore time-slice signals. If the tracee ends // up at the disarm-desched ioctl, we'll reschedule it // with the ticks interrupt still programmed. At worst, // the tracee will get an extra time-slice out of // this, on average, so we don't worry too much about // it. // // TODO: it's theoretically possible for this to // happen an unbounded number of consecutive times // and the tracee never switched out. if (SYSCALLBUF_DESCHED_SIGNAL == sig || PerfCounters::TIME_SLICE_SIGNAL == sig || t->is_sig_ignored(sig)) { LOG(debug) << " dropping ignored " << signal_name(sig); continue; } LOG(debug) << " stashing " << signal_name(sig); t->stash_sig(); } if (t->is_disarm_desched_event_syscall()) { LOG(debug) << " (at disarm-desched, so finished buffered syscall; resuming)"; t->push_event(Event::noop(t->arch())); return; } if (t->desched_rec()) { // We're already processing a desched. We probably reexecuted the // system call (e.g. because a signal was processed) and the syscall // blocked again. Carry on with the current desched. } else { /* This prevents the syscallbuf record counter from being * reset until we've finished guiding the tracee through this * interrupted call. We use the record counter for * assertions. */ t->delay_syscallbuf_reset = true; /* The tracee is (re-)entering the buffered syscall. Stash * away this breadcrumb so that we can figure out what syscall * the tracee was in, and how much "scratch" space it carved * off the syscallbuf, if needed. */ const struct syscallbuf_record* desched_rec = next_record(t->syscallbuf_hdr); t->push_event(DeschedEvent(desched_rec, t->arch())); int call = t->desched_rec()->syscallno; /* The descheduled syscall was interrupted by a signal, like * all other may-restart syscalls, with the exception that * this one has already been restarted (which we'll detect * back in the main loop). */ t->push_event(Event(interrupted, SyscallEvent(call, t->arch()))); SyscallEvent& ev = t->ev().Syscall(); ev.desched_rec = desched_rec; } SyscallEvent& ev = t->ev().Syscall(); ev.regs = t->regs(); /* For some syscalls (at least poll) but not all (at least not read), * repeated cont_syscall()s above of the same interrupted syscall * can set $orig_eax to 0 ... for unclear reasons. Fix that up here * otherwise we'll get a divergence during replay, which will not * encounter this problem. */ int call = t->desched_rec()->syscallno; ev.regs.set_original_syscallno(call); t->set_regs(ev.regs); ev.state = EXITING_SYSCALL; LOG(debug) << " resuming (and probably switching out) blocked `" << t->syscall_name(call) << "'"; } static bool is_safe_to_deliver_signal(Task* t) { struct syscallbuf_hdr* hdr = t->syscallbuf_hdr; if (!hdr) { /* Can't be in critical section because the lock * doesn't exist yet! */ return true; } if (!t->is_in_syscallbuf()) { /* The tracee is outside the syscallbuf code, * so in most cases can't possibly affect * syscallbuf critical sections. The * exception is signal handlers "re-entering" * desched'd syscalls, which are OK. */ return true; } if (t->is_in_traced_syscall()) { LOG(debug) << " tracee at traced syscallbuf syscall"; return true; } if (t->is_in_untraced_syscall() && t->desched_rec()) { LOG(debug) << " tracee interrupted by desched of " << t->syscall_name(t->desched_rec()->syscallno); return true; } // Our emulation of SYS_rrcall_notify_syscall_hook_exit clears this flag. hdr->notify_on_syscall_hook_exit = true; return false; } SignalHandled handle_signal(Task* t, siginfo_t* si) { LOG(debug) << t->tid << ": handling signal " << signal_name(si->si_signo) << " (pevent: " << t->ptrace_event() << ", event: " << t->ev(); /* We have to check for a desched event first, because for * those we *do not* want to (and cannot, most of the time) * step the tracee out of the syscallbuf code before * attempting to deliver the signal. */ if (SYSCALLBUF_DESCHED_SIGNAL == si->si_signo) { handle_desched_event(t, si); return SIGNAL_HANDLED; } if (!is_safe_to_deliver_signal(t)) { return DEFER_SIGNAL; } t->set_siginfo_for_synthetic_SIGCHLD(si); /* See if this signal occurred because of an rr implementation detail, * and fudge t appropriately. */ switch (si->si_signo) { case SIGSEGV: if (try_handle_rdtsc(t, si) || try_grow_map(t, si)) { // When SIGSEGV is blocked, apparently the kernel has to do // some ninjutsu to raise the trap. We see the SIGSEGV // bit in the "SigBlk" mask in /proc/status cleared, and if // there's a user handler the SIGSEGV bit in "SigCgt" is // cleared too. That's perfectly fine, except that it's // unclear who's supposed to undo the signal-state munging. A // legitimate argument can be made that the tracer is // responsible, so we go ahead and restore the old state. // // One could also argue that this is a kernel bug. If so, // then this is a workaround that can be removed in the // future. // // If we don't restore the old state, at least firefox has // been observed to hang at delivery of SIGSEGV. However, the // test written for this bug, fault_in_code_addr, doesn't hang // without the restore. if (t->is_sig_blocked(SIGSEGV)) { restore_sigsegv_state(t); } return SIGNAL_HANDLED; } break; case PerfCounters::TIME_SLICE_SIGNAL: t->push_event(Event(EV_SCHED, HAS_EXEC_INFO, t->arch())); return SIGNAL_HANDLED; } /* This signal was generated by the program or an external * source, record it normally. */ if (t->emulate_ptrace_stop((si->si_signo << 8) | 0x7f, SIGNAL_DELIVERY_STOP)) { t->save_ptrace_signal_siginfo(*si); // Record a SCHED event so that replay progresses the tracee to the // current point before we notify the tracer. t->push_event(Event(EV_SCHED, HAS_EXEC_INFO, t->arch())); t->record_current_event(); t->pop_event(EV_SCHED); // ptracer has been notified, so don't deliver the signal now. // The signal won't be delivered for real until the ptracer calls // PTRACE_CONT with the signal number (which we don't support yet!). return SIGNAL_PTRACE_STOP; } t->push_event(SignalEvent(*si, t->arch())); return SIGNAL_HANDLED; } rr-4.1.0/src/record_signal.h000066400000000000000000000015631265436462100157300ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_HANDLE_SIGNAL_H__ #define RR_HANDLE_SIGNAL_H__ #include class Task; const int SIGCHLD_SYNTHETIC = 0xbeadf00d; void disarm_desched_event(Task* t); void arm_desched_event(Task* t); enum SignalHandled { SIGNAL_HANDLED, SIGNAL_PTRACE_STOP, DEFER_SIGNAL }; /** * Handle the given signal for |t|. * Returns SIGNAL_HANDLED if we handled the signal, SIGNAL_PTRACE_STOP if we * didn't handle the signal due to an emulated ptrace-stop, and SIGNAL_DEFER * if we can't handle the signal right now and should try calling * handle_signal again later in task execution. * Handling the signal means we either pushed a new signal event, new * desched + syscall-interruption events, or no-op. */ SignalHandled handle_signal(Task* t, siginfo_t* si); #endif /* RR_HANDLE_SIGNAL_H__ */ rr-4.1.0/src/record_syscall.cc000066400000000000000000003521731265436462100162710ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "ProcessSyscallRec" #include "record_syscall.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "preload/preload_interface.h" #include "AutoRemoteSyscalls.h" #include "drm.h" #include "Flags.h" #include "kernel_abi.h" #include "kernel_metadata.h" #include "kernel_supplement.h" #include "log.h" #include "RecordSession.h" #include "Scheduler.h" #include "task.h" #include "TraceStream.h" #include "util.h" using namespace std; using namespace rr; union _semun { int val; struct semid64_ds* buf; unsigned short int* array; struct seminfo* __buf; }; /* We can't include to get shmctl because it clashes with * linux/shm.h. */ static int _shmctl(int shmid, int cmd, shmid64_ds* buf) { if (sizeof(void*) == 4) { cmd |= IPC_64; } #ifdef SYS_shmctl return syscall(SYS_shmctl, shmid, cmd, buf); #else return syscall(SYS_ipc, SHMCTL, shmid, cmd, 0, buf); #endif } static int _semctl(int semid, int semnum, int cmd, _semun un_arg) { if (sizeof(void*) == 4) { cmd |= IPC_64; } #ifdef SYS_semctl return syscall(SYS_semctl, semid, semnum, cmd, un_arg); #else return syscall(SYS_ipc, SEMCTL, semid, semnum, cmd, &un_arg); #endif } /** * Modes used to register syscall memory parameter with TaskSyscallState. */ enum ArgMode { // Syscall memory parameter is an in-parameter only. // This is only important when we want to move the buffer to scratch memory // so we can modify it without making the modifications potentially visible // to user code. Otherwise, such parameters can be ignored. IN, // Syscall memory parameter is out-parameter only. OUT, // Syscall memory parameter is an in-out parameter. IN_OUT, // Syscall memory parameter is an in-out parameter but we must not use // scratch (e.g. for futexes, we must use the actual memory word). IN_OUT_NO_SCRATCH }; /** * Specifies how to determine the size of a syscall memory * parameter. There is usually an incoming size determined before the syscall * executes (which we need in order to allocate scratch memory), combined * with an optional final size taken from the syscall result or a specific * memory location after the syscall has executed. The minimum of the incoming * and final sizes is used, if both are present. */ struct ParamSize { ParamSize(size_t incoming_size = size_t(-1)) : incoming_size(incoming_size), from_syscall(false) {} /** * p points to a tracee location that is already initialized with a * "maximum buffer size" passed in by the tracee, and which will be filled * in with the size of the data by the kernel when the syscall exits. */ template static ParamSize from_initialized_mem(Task* t, remote_ptr p) { ParamSize r(p.is_null() ? size_t(0) : size_t(t->read_mem(p))); r.mem_ptr = p; r.read_size = sizeof(T); return r; } /** * p points to a tracee location which will be filled in with the size of * the data by the kernel when the syscall exits, but the location * is uninitialized before the syscall. */ template static ParamSize from_mem(remote_ptr p) { ParamSize r(size_t(-1)); r.mem_ptr = p; r.read_size = sizeof(T); return r; } /** * When the syscall exits, the syscall result will be of type T and contain * the size of the data. 'incoming_size', if present, is a bound on the size * of the data. */ template static ParamSize from_syscall_result(size_t incoming_size = size_t(-1)) { ParamSize r(incoming_size); r.from_syscall = true; r.read_size = sizeof(T); return r; } /** * Indicate that the size will be at most 'max'. */ ParamSize limit_size(size_t max) const { ParamSize r(*this); r.incoming_size = min(r.incoming_size, max); return r; } /** * Return true if 'other' takes its dynamic size from the same source as * this. * When multiple syscall memory parameters take their dynamic size from the * same source, the source size is distributed among them, with the first * registered parameter taking up to its max_size bytes, followed by the next, * etc. This lets us efficiently record iovec buffers. */ bool is_same_source(const ParamSize& other) const { return ((!mem_ptr.is_null() && other.mem_ptr == mem_ptr) || (from_syscall && other.from_syscall)) && (read_size == other.read_size); } /** * Compute the actual size after the syscall has executed. * 'already_consumed' bytes are subtracted from the syscall-result/ * memory-location part of the size. */ size_t eval(Task* t, size_t already_consumed) const; size_t incoming_size; /** If non-null, the size is limited by the value at this location after * the syscall. */ remote_ptr mem_ptr; /** Size of the value at mem_ptr or in the syscall result register. */ size_t read_size; /** If true, the size is limited by the value of the syscall result. */ bool from_syscall; }; size_t ParamSize::eval(Task* t, size_t already_consumed) const { size_t s = incoming_size; if (!mem_ptr.is_null()) { size_t mem_size; switch (read_size) { case 4: mem_size = t->read_mem(mem_ptr.cast()); break; case 8: mem_size = t->read_mem(mem_ptr.cast()); break; default: ASSERT(t, false) << "Unknown read_size"; return 0; } ASSERT(t, already_consumed <= mem_size); s = min(s, mem_size - already_consumed); } if (from_syscall) { size_t syscall_size = max(0, t->regs().syscall_result_signed()); switch (read_size) { case 4: syscall_size = uint32_t(syscall_size); break; case 8: syscall_size = uint64_t(syscall_size); break; default: ASSERT(t, false) << "Unknown read_size"; return 0; } ASSERT(t, already_consumed <= syscall_size); s = min(s, syscall_size - already_consumed); } ASSERT(t, s < size_t(-1)); return s; } /** * When tasks enter syscalls that may block and so must be * prepared for a context-switch, and the syscall params * include (in)outparams that point to buffers, we need to * redirect those arguments to scratch memory. This allows rr * to serialize execution of what may be multiple blocked * syscalls completing "simultaneously" (from rr's * perspective). After the syscall exits, we restore the data * saved in scratch memory to the original buffers. * * Then during replay, we simply restore the saved data to the * tracee's passed-in buffer args and continue on. * * This is implemented by having rec_prepare_syscall_arch set up * a record in param_list for syscall in-memory parameter (whether * "in" or "out"). Then done_preparing is called, which does the actual * scratch setup. process_syscall_results is called when the syscall is * done, to write back scratch results to the real parameters and * clean everything up. * * ... a fly in this ointment is may-block buffered syscalls. * If a task blocks in one of those, it will look like it just * entered a syscall that needs a scratch buffer. However, * it's too late at that point to fudge the syscall args, * because processing of the syscall has already begun in the * kernel. But that's OK: the syscallbuf code has already * swapped out the original buffer-pointers for pointers into * the syscallbuf (which acts as its own scratch memory). We * just have to worry about setting things up properly for * replay. * * The descheduled syscall will "abort" its commit into the * syscallbuf, so the outparam data won't actually be saved * there (and thus, won't be restored during replay). During * replay, we have to restore them like we restore the * non-buffered-syscall scratch data. This is done by recording * the relevant syscallbuf record data in rec_process_syscall_arch. */ struct TaskSyscallState { void init(Task* t) { if (preparation_done) { return; } this->t = t; scratch = t->scratch_ptr; } /** * Identify a syscall memory parameter whose address is in register 'arg' * with type T. * Returns a remote_ptr to the data in the child (before scratch relocation) * or null if parameters have already been prepared (the syscall is * resuming). */ template remote_ptr reg_parameter(int arg, ArgMode mode = OUT) { return reg_parameter(arg, sizeof(T), mode).cast(); } /** * Identify a syscall memory parameter whose address is in register 'arg' * with size 'size'. * Returns a remote_ptr to the data in the child (before scratch relocation) * or null if parameters have already been prepared (the syscall is * resuming). */ remote_ptr reg_parameter(int arg, const ParamSize& size, ArgMode mode = OUT); /** * Identify a syscall memory parameter whose address is in memory at * location 'addr_of_buf_ptr' with type T. * Returns a remote_ptr to the data in the child (before scratch relocation) * or null if parameters have already been prepared (the syscall is * resuming). * addr_of_buf_ptr must be in a buffer identified by some init_..._parameter * call. */ template remote_ptr mem_ptr_parameter(remote_ptr addr_of_buf_ptr, ArgMode mode = OUT) { return mem_ptr_parameter(addr_of_buf_ptr, sizeof(T), mode).cast(); } /** * Identify a syscall memory parameter whose address is in memory at * location 'addr_of_buf_ptr' with type T. * Returns a remote_ptr to the data in the child (before scratch relocation) * or null if parameters have already been prepared (the syscall is * resuming). * addr_of_buf_ptr must be in a buffer identified by some init_..._parameter * call. */ template remote_ptr mem_ptr_parameter_inferred( remote_ptr addr_of_buf_ptr, ArgMode mode = OUT) { remote_ptr p = mem_ptr_parameter(addr_of_buf_ptr, Ptr::referent_size(), mode); return p.cast(); } /** * Identify a syscall memory parameter whose address is in memory at * location 'addr_of_buf_ptr' with size 'size'. * Returns a remote_ptr to the data in the child (before scratch relocation) * or null if parameters have already been prepared (the syscall is * resuming). * addr_of_buf_ptr must be in a buffer identified by some init_..._parameter * call. */ remote_ptr mem_ptr_parameter(remote_ptr addr_of_buf_ptr, const ParamSize& size, ArgMode mode = OUT); typedef void (*AfterSyscallAction)(Task* t); void after_syscall_action(AfterSyscallAction action) { after_syscall_actions.push_back(action); } void emulate_result(uint64_t result) { ASSERT(t, !preparation_done); ASSERT(t, !should_emulate_result); should_emulate_result = true; emulated_result = result; } /** * Internal method that takes 'ptr', an address within some memory parameter, * and relocates it to the parameter's location in scratch memory. */ remote_ptr relocate_pointer_to_scratch(remote_ptr ptr); /** * Internal method that takes the index of a MemoryParam and a vector * containing the actual sizes assigned to each param < param_index, and * computes the actual size to use for parameter param_index. */ size_t eval_param_size(size_t param_index, vector& actual_sizes); /** * Called when all memory parameters have been identified. If 'sw' is * ALLOW_SWITCH, sets up scratch memory and updates registers etc as * necessary. * If scratch can't be used for some reason, returns PREVENT_SWITCH, * otherwise returns 'sw'. */ Switchable done_preparing(Switchable sw); enum WriteBack { WRITE_BACK, NO_WRITE_BACK }; /** * Called when a syscall exits to copy results from scratch memory to their * original destinations, update registers, etc. */ void process_syscall_results(); /** * Upon successful syscall completion, each RestoreAndRecordScratch record * in param_list consumes num_bytes from the t->scratch_ptr * buffer, copying the data to remote_dest and recording the data at * remote_dest. If ptr_in_reg is greater than zero, updates the task's * ptr_in_reg register with 'remote_dest'. If ptr_in_memory is non-null, * updates the ptr_in_memory location with the value 'remote_dest'. */ struct MemoryParam { MemoryParam() : ptr_in_reg(0) {} remote_ptr dest; remote_ptr scratch; ParamSize num_bytes; remote_ptr ptr_in_memory; int ptr_in_reg; ArgMode mode; }; Task* t; vector param_list; /** Tracks the position in t's scratch_ptr buffer where we should allocate * the next scratch area. */ remote_ptr scratch; vector after_syscall_actions; std::unique_ptr exec_saved_event; Task* ptraced_tracee; /** Task created by clone()/fork(). Set when we get a PTRACE_EVENT_CLONE/FORK. */ Task* new_task; /** Saved syscall-entry registers, used by a couple of code paths that * modify the registers temporarily. */ std::unique_ptr syscall_entry_registers; /** When nonzero, syscall is expected to return the given errno and we should * die if it does not. This is set when we detect an error condition during * syscall-enter preparation. */ int expect_errno; /** When should_emulate_result is true, syscall result should be adjusted to * be emulated_result. */ bool should_emulate_result; uint64_t emulated_result; /** Records whether the syscall is switchable. Only valid when * preparation_done is true. */ Switchable switchable; /** Whether we should write back the syscall results from scratch. Only * valid when preparation_done is true. */ WriteBack write_back; /** When true, this syscall has already been prepared and should not * be set up again. */ bool preparation_done; /** When true, the scratch area is enabled, otherwise we're letting * syscall outputs be written directly to their destinations. * Only valid when preparation_done is true. */ bool scratch_enabled; /** Miscellaneous saved data that can be used by particular syscalls */ vector saved_data; TaskSyscallState() : t(nullptr), ptraced_tracee(nullptr), new_task(nullptr), expect_errno(0), should_emulate_result(false), preparation_done(false), scratch_enabled(false) {} }; static const Property syscall_state_property; void rec_set_syscall_new_task(Task* t, Task* new_task) { auto syscall_state = syscall_state_property.get(*t); ASSERT(t, syscall_state) << "new task created outside of syscall?"; ASSERT(t, is_clone_syscall(t->regs().original_syscallno(), t->arch()) || is_fork_syscall(t->regs().original_syscallno(), t->arch())); ASSERT(t, !syscall_state->new_task); syscall_state->new_task = new_task; } template static void set_remote_ptr_arch(Task* t, remote_ptr addr, remote_ptr value) { auto typed_addr = addr.cast(); t->write_mem(typed_addr, (typename Arch::unsigned_word)value.as_int()); } static void set_remote_ptr(Task* t, remote_ptr addr, remote_ptr value) { RR_ARCH_FUNCTION(set_remote_ptr_arch, t->arch(), t, addr, value); } template static remote_ptr get_remote_ptr_arch(Task* t, remote_ptr addr) { auto typed_addr = addr.cast(); auto old = t->read_mem(typed_addr); return remote_ptr(old); } static remote_ptr get_remote_ptr(Task* t, remote_ptr addr) { RR_ARCH_FUNCTION(get_remote_ptr_arch, t->arch(), t, addr); } static void align_scratch(remote_ptr* scratch, uintptr_t amount = 8) { *scratch = (scratch->as_int() + amount - 1) & ~(amount - 1); } remote_ptr TaskSyscallState::reg_parameter(int arg, const ParamSize& size, ArgMode mode) { if (preparation_done) { return remote_ptr(); } MemoryParam param; param.dest = t->regs().arg(arg); if (param.dest.is_null()) { return remote_ptr(); } param.num_bytes = size; param.mode = mode; if (mode != IN_OUT_NO_SCRATCH) { param.scratch = scratch; scratch += param.num_bytes.incoming_size; align_scratch(&scratch); param.ptr_in_reg = arg; } param_list.push_back(param); return param.dest; } remote_ptr TaskSyscallState::mem_ptr_parameter( remote_ptr addr_of_buf_ptr, const ParamSize& size, ArgMode mode) { if (preparation_done) { return remote_ptr(); } MemoryParam param; param.dest = get_remote_ptr(t, addr_of_buf_ptr); if (param.dest.is_null()) { return remote_ptr(); } param.num_bytes = size; param.mode = mode; if (mode != IN_OUT_NO_SCRATCH) { param.scratch = scratch; scratch += param.num_bytes.incoming_size; align_scratch(&scratch); param.ptr_in_memory = addr_of_buf_ptr; } param_list.push_back(param); return param.dest; } remote_ptr TaskSyscallState::relocate_pointer_to_scratch( remote_ptr ptr) { int num_relocations = 0; remote_ptr result; for (auto& param : param_list) { if (param.dest <= ptr && ptr < param.dest + param.num_bytes.incoming_size) { result = param.scratch + (ptr - param.dest); ++num_relocations; } } assert(num_relocations > 0 && "Pointer in non-scratch memory being updated to point to scratch?"); assert(num_relocations <= 1 && "Overlapping buffers containing relocated pointer?"); return result; } Switchable TaskSyscallState::done_preparing(Switchable sw) { if (preparation_done) { return switchable; } preparation_done = true; write_back = WRITE_BACK; ssize_t scratch_num_bytes = scratch - t->scratch_ptr; ASSERT(t, scratch_num_bytes >= 0); if (sw == ALLOW_SWITCH && scratch_num_bytes > t->scratch_size) { LOG(warn) << "`" << t->syscall_name(t->ev().Syscall().number) << "' needed a scratch buffer of size " << scratch_num_bytes << ", but only " << t->scratch_size << " was available. Disabling context switching: deadlock may follow."; switchable = PREVENT_SWITCH; } else { switchable = sw; } if (switchable == PREVENT_SWITCH || param_list.empty()) { return switchable; } scratch_enabled = true; // Step 1: Copy all IN/IN_OUT parameters to their scratch areas for (auto& param : param_list) { ASSERT(t, param.num_bytes.incoming_size < size_t(-1)); if (param.mode == IN_OUT || param.mode == IN) { // Initialize scratch buffer with input data t->remote_memcpy(param.scratch, param.dest, param.num_bytes.incoming_size); } } // Step 2: Update pointers in registers/memory to point to scratch areas Registers r = t->regs(); for (auto& param : param_list) { if (param.ptr_in_reg) { r.set_arg(param.ptr_in_reg, param.scratch.as_int()); } if (!param.ptr_in_memory.is_null()) { // Pointers being relocated must themselves be in scratch memory. // We don't want to modify non-scratch memory. Find the pointer's location // in scratch memory. auto p = relocate_pointer_to_scratch(param.ptr_in_memory); // Update pointer to point to scratch. // Note that this can only happen after step 1 is complete and all // parameter data has been copied to scratch memory. set_remote_ptr(t, p, param.scratch); } // If the number of bytes to record is coming from a memory location, // update that location to scratch. if (!param.num_bytes.mem_ptr.is_null()) { param.num_bytes.mem_ptr = relocate_pointer_to_scratch(param.num_bytes.mem_ptr); } } t->set_regs(r); return switchable; } size_t TaskSyscallState::eval_param_size(size_t i, vector& actual_sizes) { assert(actual_sizes.size() == i); size_t already_consumed = 0; for (size_t j = 0; j < i; ++j) { if (param_list[j].num_bytes.is_same_source(param_list[i].num_bytes)) { already_consumed += actual_sizes[j]; } } size_t size = param_list[i].num_bytes.eval(t, already_consumed); actual_sizes.push_back(size); return size; } void TaskSyscallState::process_syscall_results() { ASSERT(t, preparation_done); // XXX what's the best way to handle failed syscalls? Currently we just // record everything as if it succeeded. That handles failed syscalls that // wrote partial results, but doesn't handle syscalls that failed with // EFAULT. vector actual_sizes; if (scratch_enabled) { size_t scratch_num_bytes = scratch - t->scratch_ptr; auto data = t->read_mem(t->scratch_ptr.cast(), scratch_num_bytes); Registers r = t->regs(); // Step 1: compute actual sizes of all buffers and copy outputs // from scratch back to their origin for (size_t i = 0; i < param_list.size(); ++i) { auto& param = param_list[i]; size_t size = eval_param_size(i, actual_sizes); if (write_back == WRITE_BACK && (param.mode == IN_OUT || param.mode == OUT)) { const uint8_t* d = data.data() + (param.scratch - t->scratch_ptr); t->write_bytes_helper(param.dest, size, d); } } bool memory_cleaned_up = false; // Step 2: restore modified in-memory pointers and registers for (size_t i = 0; i < param_list.size(); ++i) { auto& param = param_list[i]; if (param.ptr_in_reg) { r.set_arg(param.ptr_in_reg, param.dest.as_int()); } if (!param.ptr_in_memory.is_null()) { memory_cleaned_up = true; set_remote_ptr(t, param.ptr_in_memory, param.dest); } } if (write_back == WRITE_BACK) { // Step 3: record all output memory areas for (size_t i = 0; i < param_list.size(); ++i) { auto& param = param_list[i]; size_t size = actual_sizes[i]; if (param.mode == IN_OUT_NO_SCRATCH) { t->record_remote(param.dest, size); } else if (param.mode == IN_OUT || param.mode == OUT) { // If pointers in memory were fixed up in step 2, then record // from tracee memory to ensure we record such fixes. Otherwise we // can record from our local data. // XXX This optimization can be improved if necessary... if (memory_cleaned_up) { t->record_remote(param.dest, size); } else { const uint8_t* d = data.data() + (param.scratch - t->scratch_ptr); t->record_local(param.dest, size, d); } } } } t->set_regs(r); } else { for (size_t i = 0; i < param_list.size(); ++i) { auto& param = param_list[i]; size_t size = eval_param_size(i, actual_sizes); t->record_remote(param.dest, size); } } if (should_emulate_result) { Registers r = t->regs(); r.set_syscall_result(emulated_result); t->set_regs(r); } for (auto& action : after_syscall_actions) { action(t); } } template static void prepare_recvmsg(Task* t, TaskSyscallState& syscall_state, remote_ptr msgp, const ParamSize& io_size) { auto namelen_ptr = REMOTE_PTR_FIELD(msgp, msg_namelen); syscall_state.mem_ptr_parameter( REMOTE_PTR_FIELD(msgp, msg_name), ParamSize::from_initialized_mem(t, namelen_ptr)); auto msg = t->read_mem(msgp); remote_ptr iovecsp_void = syscall_state.mem_ptr_parameter( REMOTE_PTR_FIELD(msgp, msg_iov), sizeof(typename Arch::iovec) * msg.msg_iovlen, IN); auto iovecsp = iovecsp_void.cast(); auto iovecs = t->read_mem(iovecsp, msg.msg_iovlen); for (size_t i = 0; i < msg.msg_iovlen; ++i) { syscall_state.mem_ptr_parameter(REMOTE_PTR_FIELD(iovecsp + i, iov_base), io_size.limit_size(iovecs[i].iov_len)); } auto controllen_ptr = REMOTE_PTR_FIELD(msgp, msg_controllen); syscall_state.mem_ptr_parameter( REMOTE_PTR_FIELD(msgp, msg_control), ParamSize::from_initialized_mem(t, controllen_ptr)); } template static void prepare_recvmmsg(Task* t, TaskSyscallState& syscall_state, remote_ptr mmsgp, unsigned int vlen) { for (unsigned int i = 0; i < vlen; ++i) { auto msgp = mmsgp + i; prepare_recvmsg(t, syscall_state, REMOTE_PTR_FIELD(msgp, msg_hdr), ParamSize::from_mem(REMOTE_PTR_FIELD(msgp, msg_len))); } } template static Switchable prepare_socketcall(Task* t, TaskSyscallState& syscall_state) { /* int socketcall(int call, unsigned long *args) { * long a[6]; * copy_from_user(a,args); * sys_recv(a0, (void __user *)a1, a[2], a[3]); * } * * (from http://lxr.linux.no/#linux+v3.6.3/net/socket.c#L2354) */ switch ((int)t->regs().arg1_signed()) { /* int socket(int domain, int type, int protocol); */ case SYS_SOCKET: /* int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen); */ case SYS_CONNECT: /* int bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen); */ case SYS_BIND: /* int listen(int sockfd, int backlog) */ case SYS_LISTEN: /* ssize_t send(int sockfd, const void *buf, size_t len, int flags) */ case SYS_SEND: /* ssize_t sendto(int sockfd, const void *buf, size_t len, int flags, const * struct sockaddr *dest_addr, socklen_t addrlen); */ case SYS_SENDTO: /* int setsockopt(int sockfd, int level, int optname, const void *optval, * socklen_t optlen); */ case SYS_SETSOCKOPT: /* int shutdown(int socket, int how) */ case SYS_SHUTDOWN: break; /* int getsockopt(int sockfd, int level, int optname, const void *optval, * socklen_t* optlen); */ case SYS_GETSOCKOPT: { auto argsp = syscall_state.reg_parameter(2, IN); auto optlen_ptr = syscall_state.mem_ptr_parameter_inferred( REMOTE_PTR_FIELD(argsp, optlen), IN_OUT); syscall_state.mem_ptr_parameter( REMOTE_PTR_FIELD(argsp, optval), ParamSize::from_initialized_mem(t, optlen_ptr)); break; } /* int socketpair(int domain, int type, int protocol, int sv[2]); * * values returned in sv */ case SYS_SOCKETPAIR: { auto argsp = syscall_state.reg_parameter(2, IN); syscall_state.mem_ptr_parameter(REMOTE_PTR_FIELD(argsp, sv), sizeof(int) * 2); break; } /* int getpeername(int sockfd, struct sockaddr *addr, socklen_t *addrlen); */ case SYS_GETPEERNAME: /* int getsockname(int sockfd, struct sockaddr *addr, socklen_t *addrlen); */ case SYS_GETSOCKNAME: { auto argsp = syscall_state.reg_parameter(2, IN); auto addrlen_ptr = syscall_state.mem_ptr_parameter_inferred( REMOTE_PTR_FIELD(argsp, addrlen), IN_OUT); syscall_state.mem_ptr_parameter( REMOTE_PTR_FIELD(argsp, addr), ParamSize::from_initialized_mem(t, addrlen_ptr)); break; } /* ssize_t recv([int sockfd, void *buf, size_t len, int flags]) */ case SYS_RECV: { auto argsp = syscall_state.reg_parameter(2, IN); auto args = t->read_mem(argsp); syscall_state.mem_ptr_parameter( REMOTE_PTR_FIELD(argsp, buf), ParamSize::from_syscall_result(args.len)); return ALLOW_SWITCH; } /* int accept([int sockfd, struct sockaddr *addr, socklen_t *addrlen]) */ case SYS_ACCEPT: { auto argsp = syscall_state.reg_parameter(2, IN); auto addrlen_ptr = syscall_state.mem_ptr_parameter_inferred( REMOTE_PTR_FIELD(argsp, addrlen), IN_OUT); syscall_state.mem_ptr_parameter( REMOTE_PTR_FIELD(argsp, addr), ParamSize::from_initialized_mem(t, addrlen_ptr)); return ALLOW_SWITCH; } /* int accept4([int sockfd, struct sockaddr *addr, socklen_t *addrlen, int * flags]) */ case SYS_ACCEPT4: { auto argsp = syscall_state.reg_parameter(2, IN); auto addrlen_ptr = syscall_state.mem_ptr_parameter_inferred( REMOTE_PTR_FIELD(argsp, addrlen), IN_OUT); syscall_state.mem_ptr_parameter( REMOTE_PTR_FIELD(argsp, addr), ParamSize::from_initialized_mem(t, addrlen_ptr)); return ALLOW_SWITCH; } case SYS_RECVFROM: { auto argsp = syscall_state.reg_parameter(2, IN); auto args = t->read_mem(argsp); syscall_state.mem_ptr_parameter( REMOTE_PTR_FIELD(argsp, buf), ParamSize::from_syscall_result(args.len)); auto addrlen_ptr = syscall_state.mem_ptr_parameter_inferred( REMOTE_PTR_FIELD(argsp, addrlen), IN_OUT); syscall_state.mem_ptr_parameter( REMOTE_PTR_FIELD(argsp, src_addr), ParamSize::from_initialized_mem(t, addrlen_ptr)); return ALLOW_SWITCH; } case SYS_RECVMSG: { auto argsp = syscall_state.reg_parameter(2, IN); auto msgp = syscall_state.mem_ptr_parameter_inferred( REMOTE_PTR_FIELD(argsp, msg), IN_OUT); prepare_recvmsg( t, syscall_state, msgp, ParamSize::from_syscall_result()); auto args = t->read_mem(argsp); if (!(args.flags & MSG_DONTWAIT)) { return ALLOW_SWITCH; } break; } case SYS_RECVMMSG: { auto argsp = syscall_state.reg_parameter(2, IN); auto args = t->read_mem(argsp); remote_ptr mmsgp_void = syscall_state.mem_ptr_parameter( REMOTE_PTR_FIELD(argsp, msgvec), sizeof(typename Arch::mmsghdr) * args.vlen, IN_OUT); auto mmsgp = mmsgp_void.cast(); prepare_recvmmsg(t, syscall_state, mmsgp, args.vlen); if (!(args.flags & MSG_DONTWAIT)) { return ALLOW_SWITCH; } break; } /* ssize_t sendmsg(int sockfd, const struct msghdr *msg, int flags) */ case SYS_SENDMSG: { auto argsp = remote_ptr(t->regs().arg2()); auto args = t->read_mem(argsp); if (!(args.flags & MSG_DONTWAIT)) { return ALLOW_SWITCH; } break; } case SYS_SENDMMSG: { auto argsp = syscall_state.reg_parameter(2, IN); auto args = t->read_mem(argsp); syscall_state.mem_ptr_parameter( REMOTE_PTR_FIELD(argsp, msgvec), sizeof(typename Arch::mmsghdr) * args.vlen, IN_OUT); if (!(args.flags & MSG_DONTWAIT)) { return ALLOW_SWITCH; } break; } default: syscall_state.expect_errno = EINVAL; break; } return PREVENT_SWITCH; } template static Switchable prepare_msgctl(Task* t, TaskSyscallState& syscall_state, int cmd, int ptr_reg) { switch (cmd) { case IPC_STAT: case MSG_STAT: syscall_state.reg_parameter(ptr_reg); break; case IPC_INFO: case MSG_INFO: syscall_state.reg_parameter(ptr_reg); break; case IPC_SET: case IPC_RMID: break; default: syscall_state.expect_errno = EINVAL; break; } return PREVENT_SWITCH; } template static Switchable prepare_shmctl(Task* t, TaskSyscallState& syscall_state, int cmd, int ptr_reg) { switch (cmd) { case IPC_SET: case IPC_RMID: case SHM_LOCK: case SHM_UNLOCK: break; case IPC_STAT: case SHM_STAT: syscall_state.reg_parameter(ptr_reg); break; case IPC_INFO: syscall_state.reg_parameter(ptr_reg); break; case SHM_INFO: syscall_state.reg_parameter(ptr_reg); break; default: syscall_state.expect_errno = EINVAL; break; } return PREVENT_SWITCH; } enum SemctlDereference { DEREFERENCE, USE_DIRECTLY }; template static Switchable prepare_semctl(Task* t, TaskSyscallState& syscall_state, int semid, int cmd, int ptr_reg, SemctlDereference deref) { switch (cmd) { case IPC_SET: case IPC_RMID: case GETNCNT: case GETPID: case GETVAL: case GETZCNT: case SETALL: case SETVAL: break; case IPC_STAT: case SEM_STAT: if (deref == DEREFERENCE) { syscall_state.mem_ptr_parameter( syscall_state.reg_parameter(ptr_reg)); } else { syscall_state.reg_parameter(ptr_reg); } break; case IPC_INFO: case SEM_INFO: if (deref == DEREFERENCE) { syscall_state.mem_ptr_parameter( syscall_state.reg_parameter(ptr_reg)); } else { syscall_state.reg_parameter(ptr_reg); } break; case GETALL: { semid64_ds ds; _semun un_arg; un_arg.buf = &ds; int ret = _semctl(semid, 0, IPC_STAT, un_arg); ASSERT(t, ret == 0); ParamSize size = sizeof(unsigned short) * ds.sem_nsems; if (deref == DEREFERENCE) { syscall_state.mem_ptr_parameter( syscall_state.reg_parameter(ptr_reg), size); } else { syscall_state.reg_parameter(ptr_reg, size); } break; } default: syscall_state.expect_errno = EINVAL; break; } return PREVENT_SWITCH; } /** * A change has been made to file 'fd' in task t. If the file has been mmapped * somewhere in t's address space, record the changes. * We check for matching files by comparing file names. This may not be * reliable but hopefully it's good enough for the cases where we need this. * This doesn't currently handle shared mappings very well. A file mapped * shared in multiple locations will be recorded once per location. * This doesn't handle mappings of the file into other address spaces. */ static void record_file_change(Task* t, int fd, uint64_t offset, uint64_t length) { string file_name = t->file_name_of_fd(fd); for (auto m : t->vm()->maps()) { if (m.map.fsname() == file_name) { uint64_t start = max(offset, uint64_t(m.map.file_offset_bytes())); uint64_t end = min(offset + length, uint64_t(m.map.file_offset_bytes()) + m.map.size()); if (start < end) { t->record_remote(m.map.start() + (start - m.map.file_offset_bytes()), end - start); } } }; } template static void record_v4l2_buffer_contents(Task* t) { remote_ptr bufp = t->regs().arg3(); auto buf = t->read_mem(bufp); switch (buf.memory) { case V4L2_MEMORY_MMAP: record_file_change(t, (int)t->regs().arg1_signed(), buf.m.offset, buf.length); return; default: ASSERT(t, false) << "Unhandled V4L2 memory type " << buf.memory; return; } } static void record_page_below_stack_ptr(Task* t) { /* Record.the page above the top of |t|'s stack. The SIOC* ioctls * have been observed to write beyond the end of tracees' stacks, as * if they had allocated scratch space for themselves. All we can do * for now is try to record the scratch data. */ t->record_remote(t->regs().sp() - page_size(), page_size()); } #define IOCTL_MASK_SIZE(v) ((v) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT)) template static Switchable prepare_ioctl(Task* t, TaskSyscallState& syscall_state) { int request = (int)t->regs().arg2_signed(); int type = _IOC_TYPE(request); int nr = _IOC_NR(request); int dir = _IOC_DIR(request); int size = _IOC_SIZE(request); LOG(debug) << "handling ioctl(" << HEX(request) << "): type:" << HEX(type) << " nr:" << HEX(nr) << " dir:" << HEX(dir) << " size:" << size; ASSERT(t, !t->is_desched_event_syscall()) << "Failed to skip past desched ioctl()"; /* Some ioctl()s are irregular and don't follow the _IOC() * conventions. Special case them here. */ switch (request) { case SIOCETHTOOL: { auto ifrp = syscall_state.reg_parameter(3, IN); syscall_state.mem_ptr_parameter( REMOTE_PTR_FIELD(ifrp, ifr_ifru.ifru_data)); syscall_state.after_syscall_action(record_page_below_stack_ptr); return PREVENT_SWITCH; } case SIOCGIFCONF: { auto ifconfp = syscall_state.reg_parameter(3, IN_OUT); syscall_state.mem_ptr_parameter( REMOTE_PTR_FIELD(ifconfp, ifc_ifcu.ifcu_buf), ParamSize::from_initialized_mem(t, REMOTE_PTR_FIELD(ifconfp, ifc_len))); syscall_state.after_syscall_action(record_page_below_stack_ptr); return PREVENT_SWITCH; } case SIOCGIFADDR: case SIOCGIFHWADDR: case SIOCGIFFLAGS: case SIOCGIFINDEX: case SIOCGIFMTU: case SIOCGIFNAME: syscall_state.reg_parameter(3); syscall_state.after_syscall_action(record_page_below_stack_ptr); return PREVENT_SWITCH; case SIOCGIWRATE: // SIOCGIWRATE hasn't been observed to write beyond // tracees' stacks, but we record a stack page here // just in case the behavior is driver-dependent. syscall_state.reg_parameter(3); syscall_state.after_syscall_action(record_page_below_stack_ptr); return PREVENT_SWITCH; case TCGETS: syscall_state.reg_parameter(3); return PREVENT_SWITCH; case TIOCINQ: syscall_state.reg_parameter(3); return PREVENT_SWITCH; case TIOCGWINSZ: syscall_state.reg_parameter(3); return PREVENT_SWITCH; case TIOCGPGRP: syscall_state.reg_parameter(3); return PREVENT_SWITCH; case SNDRV_CTL_IOCTL_PVERSION: syscall_state.reg_parameter(3); return PREVENT_SWITCH; case SNDRV_CTL_IOCTL_CARD_INFO: syscall_state.reg_parameter(3); return PREVENT_SWITCH; } /* In ioctl language, "_IOC_READ" means "outparam". Both * READ and WRITE can be set for inout params. */ if (!(_IOC_READ & dir)) { switch (IOCTL_MASK_SIZE(request)) { case IOCTL_MASK_SIZE(FIOCLEX): case IOCTL_MASK_SIZE(FIONCLEX): return PREVENT_SWITCH; } /* If the kernel isn't going to write any data back to * us, we hope and pray that the result of the ioctl * (observable to the tracee) is deterministic. * We're also assuming it doesn't block. * XXX this is far too risky! Many ioctls use irregular ioctl codes * that do not have the _IOC_READ bit set but actually do write to * user-space! */ LOG(debug) << " (presumed ignorable ioctl, nothing to do)"; return PREVENT_SWITCH; } /* The following are thought to be "regular" ioctls, the * processing of which is only known to (observably) write to * the bytes in the structure passed to the kernel. So all we * need is to record |size| bytes. * Since the size may vary across architectures we mask it out here to check * only the type + number. */ switch (IOCTL_MASK_SIZE(request)) { case IOCTL_MASK_SIZE(VIDIOC_QUERYCAP): case IOCTL_MASK_SIZE(VIDIOC_ENUM_FMT): case IOCTL_MASK_SIZE(VIDIOC_G_FMT): case IOCTL_MASK_SIZE(VIDIOC_S_FMT): case IOCTL_MASK_SIZE(VIDIOC_TRY_FMT): case IOCTL_MASK_SIZE(VIDIOC_G_PARM): case IOCTL_MASK_SIZE(VIDIOC_S_PARM): case IOCTL_MASK_SIZE(VIDIOC_REQBUFS): case IOCTL_MASK_SIZE(VIDIOC_QUERYBUF): case IOCTL_MASK_SIZE(VIDIOC_QBUF): case IOCTL_MASK_SIZE(VIDIOC_G_CTRL): case IOCTL_MASK_SIZE(VIDIOC_S_CTRL): case IOCTL_MASK_SIZE(VFAT_IOCTL_READDIR_BOTH): syscall_state.reg_parameter(3, size, IN_OUT); return PREVENT_SWITCH; case IOCTL_MASK_SIZE(TIOCGPTN): syscall_state.reg_parameter(3, size); return PREVENT_SWITCH; } /* These ioctls are mostly regular but require additional recording. */ switch (IOCTL_MASK_SIZE(request)) { case IOCTL_MASK_SIZE(VIDIOC_DQBUF): { if (size == sizeof(typename Arch::v4l2_buffer)) { syscall_state.reg_parameter(3, size, IN_OUT); syscall_state.after_syscall_action(record_v4l2_buffer_contents); // VIDIOC_DQBUF can block. It can't if the fd was opened O_NONBLOCK, // but we don't try to determine that. // Note that we're exposed to potential race conditions here because // VIDIOC_DQBUF (blocking or not) assumes the driver has filled // the mmapped data region at some point since the buffer was queued // with VIDIOC_QBUF, and we don't/can't know exactly when that // happened. Replay could fail if this thread or another thread reads // the contents of mmapped contents queued with the driver. return ALLOW_SWITCH; } } } syscall_state.expect_errno = EINVAL; return PREVENT_SWITCH; } static bool maybe_emulate_wait(Task* t, TaskSyscallState& syscall_state) { for (Task* child : t->emulated_ptrace_tracees) { if (t->is_waiting_for_ptrace(child) && child->emulated_ptrace_stop_code) { syscall_state.ptraced_tracee = child; return true; } } return false; } static void maybe_pause_instead_of_waiting(Task* t) { if (t->in_wait_type != WAIT_TYPE_PID) { return; } Task* child = t->session().find_task(t->in_wait_pid); if (!child || !t->is_waiting_for_ptrace(child) || t->is_waiting_for(child)) { return; } // OK, t is waiting for a ptrace child by tid, but since t is not really // ptracing child, entering a real wait syscall will not actually wait for // the child, so the kernel may error out with ECHILD (non-ptracers can't // wait on specific threads of another process, or for non-child processes). // To avoid this problem, we'll replace the wait syscall with a pause() // syscall. // It would be nice if we didn't have to do this, but I can't see a better // way. Registers r = t->regs(); r.set_original_syscallno(syscall_number_for_pause(t->arch())); t->set_regs(r); } static Task* verify_ptrace_target(Task* tracer, TaskSyscallState& syscall_state, pid_t pid) { Task* tracee = tracer->session().find_task(pid); if (!tracee || tracee->emulated_ptracer != tracer || tracee->emulated_stop_type == NOT_STOPPED) { syscall_state.emulate_result(-ESRCH); return nullptr; } return tracee; } static void prepare_ptrace_cont(Task* tracee, int sig) { if (sig) { siginfo_t si = tracee->take_ptrace_signal_siginfo(sig); tracee->push_event(SignalEvent(si, tracee->arch())); } tracee->emulated_stop_type = NOT_STOPPED; } static uint64_t widen_buffer_unsigned(const void* buf, size_t size) { switch (size) { case 1: return *reinterpret_cast(buf); case 2: return *reinterpret_cast(buf); case 4: return *reinterpret_cast(buf); case 8: return *reinterpret_cast(buf); default: assert(0 && "Unsupported size"); return 0; } } static int64_t widen_buffer_signed(const void* buf, size_t size) { switch (size) { case 1: return *reinterpret_cast(buf); case 2: return *reinterpret_cast(buf); case 4: return *reinterpret_cast(buf); case 8: return *reinterpret_cast(buf); default: assert(0 && "Unsupported size"); return 0; } } static uint64_t path_inode_number(const char* path) { struct stat st; int ret = stat(path, &st); assert(ret == 0); return st.st_ino; } static bool is_same_namespace(const char* name, pid_t tid1, pid_t tid2) { char path1[PATH_MAX]; char path2[PATH_MAX]; sprintf(path1, "/proc/%d/ns/%s", tid1, name); sprintf(path2, "/proc/%d/ns/%s", tid2, name); return path_inode_number(path1) == path_inode_number(path2); } template static Switchable prepare_ptrace(Task* t, TaskSyscallState& syscall_state) { syscall_state.syscall_entry_registers = unique_ptr(new Registers(t->regs())); pid_t pid = (pid_t)t->regs().arg2_signed(); bool emulate = true; switch ((int)t->regs().arg1_signed()) { case PTRACE_ATTACH: { // To simplify things, require that a ptracer be in the same pid // namespace as rr itself. I.e., tracee tasks sandboxed in a pid // namespace can't use ptrace. This is normally a requirement of // sandboxes anyway. // This could be supported, but would require some work to translate // rr's pids to/from the ptracer's pid namespace. ASSERT(t, is_same_namespace("pid", t->tid, getpid())); Task* tracee = t->session().find_task(pid); if (!tracee) { // XXX This prevents a tracee from attaching to a process which isn't // under rr's control. We could support this but it would complicate // things. syscall_state.emulate_result(-ESRCH); break; } if (tracee->emulated_ptracer || tracee->tgid() == t->tgid()) { syscall_state.emulate_result(-EPERM); break; } tracee->set_emulated_ptracer(t); syscall_state.emulate_result(0); if (tracee->emulated_stop_type == NOT_STOPPED) { // Send SIGSTOP to this specific thread. Otherwise the kernel might // deliver SIGSTOP to some other thread of the process, and we won't // generate any ptrace event if that thread isn't being ptraced. tracee->tgkill(SIGSTOP); } else { ASSERT(tracee, tracee->emulated_stop_type == GROUP_STOP); // tracee is already stopped because of a group-stop signal. // Sending a SIGSTOP won't work, but we don't need to. tracee->force_emulate_ptrace_stop((SIGSTOP << 8) | 0x7f, SIGNAL_DELIVERY_STOP); } break; } case PTRACE_GETREGS: { Task* tracee = verify_ptrace_target(t, syscall_state, pid); if (tracee) { auto data = syscall_state.reg_parameter(4); auto regs = tracee->regs().get_ptrace_for_arch(Arch::arch()); ASSERT(t, regs.size() == data.referent_size()); t->write_bytes_helper(data, regs.size(), regs.data()); syscall_state.emulate_result(0); } break; } case PTRACE_GETFPREGS: { Task* tracee = verify_ptrace_target(t, syscall_state, pid); if (tracee) { auto data = syscall_state.reg_parameter(4); auto regs = tracee->extra_regs().get_user_fpregs_struct(Arch::arch()); ASSERT(t, regs.size() == data.referent_size()); t->write_bytes_helper(data, regs.size(), regs.data()); syscall_state.emulate_result(0); } break; } case PTRACE_GETFPXREGS: { if (Arch::arch() != x86) { // GETFPXREGS is x86-32 only syscall_state.expect_errno = EIO; break; } Task* tracee = verify_ptrace_target(t, syscall_state, pid); if (tracee) { auto data = syscall_state.reg_parameter(4); auto regs = tracee->extra_regs().get_user_fpxregs_struct(); t->write_mem(data, regs); syscall_state.emulate_result(0); } break; } case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: { Task* tracee = verify_ptrace_target(t, syscall_state, pid); if (tracee) { // The actual syscall returns the data via the 'data' out-parameter. // The behavior of returning the data as the system call result is // provided by the glibc wrapper. auto datap = syscall_state.reg_parameter(4); remote_ptr addr = t->regs().arg3(); bool ok = true; auto v = tracee->read_mem(addr, &ok); if (ok) { t->write_mem(datap, v); syscall_state.emulate_result(0); } else { syscall_state.emulate_result(-EIO); } } break; } case PTRACE_POKETEXT: case PTRACE_POKEDATA: { Task* tracee = verify_ptrace_target(t, syscall_state, pid); if (tracee) { remote_ptr addr = t->regs().arg3(); typename Arch::unsigned_word data = t->regs().arg4(); bool ok = true; tracee->write_mem(addr, data, &ok); if (ok) { // Normally we'd call tracee->record_local to record the written // data. We don't do that here because the write needs to be // performed in a different address space to the current task's. // Instead we don't record anything other than the usual syscall // event, and replay_syscall performs the write. syscall_state.emulate_result(0); } else { syscall_state.emulate_result(-EIO); } } break; } case PTRACE_PEEKUSER: { Task* tracee = verify_ptrace_target(t, syscall_state, pid); if (tracee) { // The actual syscall returns the data via the 'data' out-parameter. // The behavior of returning the data as the system call result is // provided by the glibc wrapper. size_t addr = t->regs().arg3(); typename Arch::unsigned_word data; if ((addr & (sizeof(data) - 1)) || addr >= sizeof(typename Arch::user)) { syscall_state.emulate_result(-EIO); break; } auto datap = syscall_state.reg_parameter(4); if (addr < sizeof(typename Arch::user_regs_struct)) { uint8_t buf[Registers::MAX_SIZE]; bool defined; size_t size = tracee->regs().read_register_by_user_offset(buf, addr, &defined); if (defined) { // For unclear reasons, all 32-bit user_regs_struct members are // signed while all 64-bit user_regs_struct members are unsigned. if (Arch::arch() == x86) { data = widen_buffer_signed(buf, size); } else { data = widen_buffer_unsigned(buf, size); } } else { data = 0; } } else if (addr >= offsetof(typename Arch::user, u_debugreg[0]) && addr < offsetof(typename Arch::user, u_debugreg[8])) { size_t regno = (addr - offsetof(typename Arch::user, u_debugreg[0])) / sizeof(data); data = tracee->get_debug_reg(regno); } else { data = 0; } t->write_mem(datap, data); syscall_state.emulate_result(0); } break; } case PTRACE_CONT: { Task* tracee = verify_ptrace_target(t, syscall_state, pid); if (tracee) { prepare_ptrace_cont(tracee, t->regs().arg4()); syscall_state.emulate_result(0); } break; } case PTRACE_DETACH: { Task* tracee = verify_ptrace_target(t, syscall_state, pid); if (tracee) { prepare_ptrace_cont(tracee, t->regs().arg4()); tracee->set_emulated_ptracer(nullptr); syscall_state.emulate_result(0); } break; } default: syscall_state.expect_errno = EIO; emulate = false; break; } if (emulate) { Registers r = t->regs(); r.set_arg1((intptr_t)-1); t->set_regs(r); } return PREVENT_SWITCH; } /** * At thread exit time, undo the work that init_buffers() did. * * Call this when the tracee has already entered SYS_exit. The * tracee will be returned at a state in which it has entered (or * re-entered) SYS_exit. */ static void destroy_buffers(Task* t) { // NB: we have to pay all this complexity here because glibc // makes its SYS_exit call through an inline int $0x80 insn, // instead of going through the vdso. There may be a deep // reason for why it does that, but if it starts going through // the vdso in the future, this code can be eliminated in // favor of a *much* simpler vsyscall SYS_exit hook in the // preload lib. Registers exit_regs = t->regs(); ASSERT(t, is_exit_syscall(exit_regs.original_syscallno(), t->arch())) << "Tracee should have been at exit, but instead at " << t->syscall_name(exit_regs.original_syscallno()); // The tracee is at the entry to SYS_exit, but hasn't started // the call yet. We can't directly start injecting syscalls // because the tracee is still in the kernel. And obviously, // if we finish the SYS_exit syscall, the tracee isn't around // anymore. // // So hijack this SYS_exit call and rewrite it into a harmless // one that we can exit successfully, SYS_gettid here (though // that choice is arbitrary). exit_regs.set_original_syscallno(syscall_number_for_gettid(t->arch())); t->set_regs(exit_regs); // This exits the hijacked SYS_gettid. Now the tracee is // ready to do our bidding. t->advance_syscall(); // Restore these regs to what they would have been just before // the tracee trapped at SYS_exit. When we've finished // cleanup, we'll restart the SYS_exit call. exit_regs.set_original_syscallno(-1); exit_regs.set_syscallno(syscall_number_for_exit(t->arch())); exit_regs.set_ip(exit_regs.ip() - syscall_instruction_length(t->arch())); ASSERT(t, is_at_syscall_instruction(t, exit_regs.ip())) << "Tracee should have entered through int $0x80."; // Do the actual buffer and fd cleanup. t->destroy_buffers(); // Restart the SYS_exit call. t->set_regs(exit_regs); t->advance_syscall(); // XXX a signal might be received during the above, and stashed, and then // lost because we exited. But I don't really see that there's anything we // can do to prevent such a race :-(. ASSERT(t, !t->has_stashed_sig()); } template static Switchable rec_prepare_syscall_arch(Task* t, TaskSyscallState& syscall_state) { int syscallno = t->ev().Syscall().number; if (t->desched_rec()) { /* |t| was descheduled while in a buffered syscall. We don't * use scratch memory for the call, because the syscallbuf itself * is serving that purpose. More importantly, we *can't* set up * scratch for |t|, because it's already in the syscall. Instead, we will * record the syscallbuf memory in rec_process_syscall_arch. */ return ALLOW_SWITCH; } if (syscallno < 0) { // Invalid syscall. Don't let it accidentally match a // syscall number below that's for an undefined syscall. syscall_state.expect_errno = ENOSYS; return PREVENT_SWITCH; } switch (syscallno) { // All the regular syscalls are handled here. #include "SyscallRecordCase.generated" case Arch::splice: { syscall_state.reg_parameter(2, IN_OUT); syscall_state.reg_parameter(4, IN_OUT); return ALLOW_SWITCH; } case Arch::sendfile: { syscall_state.reg_parameter(3, IN_OUT); return ALLOW_SWITCH; } case Arch::sendfile64: { syscall_state.reg_parameter(3, IN_OUT); return ALLOW_SWITCH; } case Arch::capget: { auto hdr = t->read_mem( syscall_state.reg_parameter( 1, IN_OUT)); int struct_count; switch (hdr.version) { case _LINUX_CAPABILITY_VERSION_1: struct_count = _LINUX_CAPABILITY_U32S_1; break; case _LINUX_CAPABILITY_VERSION_2: struct_count = _LINUX_CAPABILITY_U32S_2; break; case _LINUX_CAPABILITY_VERSION_3: struct_count = _LINUX_CAPABILITY_U32S_3; break; default: struct_count = 0; break; } if (struct_count > 0) { syscall_state.reg_parameter( 2, sizeof(typename Arch::__user_cap_data_struct) * struct_count, OUT); } return PREVENT_SWITCH; } case Arch::clone: { syscall_state.syscall_entry_registers = unique_ptr(new Registers(t->regs())); unsigned long flags = t->regs().arg1(); if (flags & CLONE_VFORK) { Registers r = t->regs(); r.set_arg1(flags & ~CLONE_VFORK); t->set_regs(r); } if (flags & CLONE_UNTRACED) { Registers r = t->regs(); // We can't let tracees clone untraced tasks, // because they can create nondeterminism that // we can't replay. So unset the UNTRACED bit // and then cover our tracks on exit from // clone(). r.set_arg1(flags & ~CLONE_UNTRACED); t->set_regs(r); } return PREVENT_SWITCH; } case Arch::exit: t->stable_exit = true; if (t->task_group()->task_set().size() == 1) { t->task_group()->exit_code = (int)t->regs().arg1(); } destroy_buffers(t); return PREVENT_SWITCH; case Arch::exit_group: if (t->task_group()->task_set().size() == 1) { t->stable_exit = true; } t->task_group()->exit_code = (int)t->regs().arg1(); return PREVENT_SWITCH; case Arch::execve: { vector cmd_line; remote_ptr argv = t->regs().arg2(); while (true) { auto p = t->read_mem(argv); if (!p) { break; } cmd_line.push_back(t->read_c_str(p)); argv++; } // Save the event. We can't record it here because the exec might fail. string raw_filename = t->read_c_str(t->regs().arg1()); syscall_state.exec_saved_event = unique_ptr( new TraceTaskEvent(t->tid, raw_filename, cmd_line)); return PREVENT_SWITCH; } case Arch::fcntl: case Arch::fcntl64: syscall_state.syscall_entry_registers = unique_ptr(new Registers(t->regs())); switch ((int)t->regs().arg2_signed()) { case Arch::DUPFD: case Arch::DUPFD_CLOEXEC: case Arch::GETFD: case Arch::GETFL: case Arch::SETFL: case Arch::SETLK: case Arch::SETLK64: case Arch::SETOWN: case Arch::SETOWN_EX: case Arch::GETSIG: case Arch::SETSIG: case Arch::ADD_SEALS: break; case Arch::SETFD: if (!t->fd_table()->allow_close((int)t->regs().arg1())) { // Don't let tracee set FD_CLOEXEC on this fd. Disable the syscall, // but emulate a successful return. Registers r = t->regs(); r.set_arg1(-1); t->set_regs(r); syscall_state.emulate_result(0); } break; case Arch::GETLK: syscall_state.reg_parameter(3, IN_OUT); break; case Arch::GETLK64: // flock and flock64 better be different on 32-bit architectures, // but on 64-bit architectures, it's OK if they're the same. static_assert( sizeof(typename Arch::flock) < sizeof(typename Arch::flock64) || Arch::elfclass == ELFCLASS64, "struct flock64 not declared differently from struct flock"); syscall_state.reg_parameter(3, IN_OUT); break; case Arch::GETOWN_EX: syscall_state.reg_parameter(3); break; case Arch::SETLKW: case Arch::SETLKW64: // SETLKW blocks, but doesn't write any // outparam data to the |struct flock| // argument, so no need for scratch. return ALLOW_SWITCH; default: // Unknown command should trigger EINVAL. syscall_state.expect_errno = EINVAL; break; } return PREVENT_SWITCH; /* int futex(int *uaddr, int op, int val, const struct timespec *timeout, * int *uaddr2, int val3); * futex parameters are in-out but they can't be moved to scratch * addresses. */ case Arch::futex: switch ((int)t->regs().arg2_signed() & FUTEX_CMD_MASK) { case FUTEX_WAIT: case FUTEX_WAIT_BITSET: syscall_state.reg_parameter(1, IN_OUT_NO_SCRATCH); return ALLOW_SWITCH; case FUTEX_CMP_REQUEUE: case FUTEX_WAKE_OP: syscall_state.reg_parameter(1, IN_OUT_NO_SCRATCH); syscall_state.reg_parameter(5, IN_OUT_NO_SCRATCH); break; case FUTEX_WAKE: syscall_state.reg_parameter(1, IN_OUT_NO_SCRATCH); break; default: syscall_state.expect_errno = EINVAL; break; } return PREVENT_SWITCH; case Arch::getrandom: syscall_state.reg_parameter( 1, ParamSize::from_syscall_result((size_t)t->regs().arg2())); return (GRND_NONBLOCK & t->regs().arg3()) ? PREVENT_SWITCH : ALLOW_SWITCH; case Arch::set_thread_area: syscall_state.reg_parameter(1, IN_OUT); return PREVENT_SWITCH; case Arch::ipc: switch ((int)t->regs().arg1_signed()) { case MSGGET: case SHMDT: case SHMGET: case SEMGET: break; case MSGCTL: { int cmd = (int)t->regs().arg3_signed() & ~IPC_64; return prepare_msgctl(t, syscall_state, cmd, 5); } case MSGSND: case SEMOP: case SEMTIMEDOP: return ALLOW_SWITCH; case MSGRCV: { size_t msgsize = t->regs().arg3(); auto kluge_args = syscall_state.reg_parameter(5, IN); syscall_state.mem_ptr_parameter(REMOTE_PTR_FIELD(kluge_args, msgbuf), sizeof(typename Arch::signed_long) + msgsize); return ALLOW_SWITCH; } case SHMAT: { // Insane legacy feature: ipc SHMAT returns its pointer via an // in-memory out parameter. syscall_state.reg_parameter(4); return PREVENT_SWITCH; } case SHMCTL: { int cmd = (int)t->regs().arg3_signed() & ~IPC_64; return prepare_shmctl(t, syscall_state, cmd, 5); } case SEMCTL: { int cmd = (int)t->regs().arg4_signed() & ~IPC_64; return prepare_semctl(t, syscall_state, (int)t->regs().arg2_signed(), cmd, 5, DEREFERENCE); } default: syscall_state.expect_errno = EINVAL; break; } return PREVENT_SWITCH; case Arch::msgctl: return prepare_msgctl(t, syscall_state, (int)t->regs().arg2_signed(), 3); case Arch::msgrcv: { size_t msgsize = t->regs().arg3(); syscall_state.reg_parameter(2, sizeof(typename Arch::signed_long) + msgsize); return ALLOW_SWITCH; } case Arch::msgsnd: case Arch::semop: case Arch::semtimedop: return ALLOW_SWITCH; case Arch::socketcall: return prepare_socketcall(t, syscall_state); case Arch::select: case Arch::_newselect: if (syscallno == Arch::select && Arch::select_semantics == Arch::SelectStructArguments) { auto argsp = syscall_state.reg_parameter(1, IN); syscall_state.mem_ptr_parameter_inferred( REMOTE_PTR_FIELD(argsp, read_fds), IN_OUT); syscall_state.mem_ptr_parameter_inferred( REMOTE_PTR_FIELD(argsp, write_fds), IN_OUT); syscall_state.mem_ptr_parameter_inferred( REMOTE_PTR_FIELD(argsp, except_fds), IN_OUT); syscall_state.mem_ptr_parameter_inferred( REMOTE_PTR_FIELD(argsp, timeout), IN_OUT); } else { syscall_state.reg_parameter(2, IN_OUT); syscall_state.reg_parameter(3, IN_OUT); syscall_state.reg_parameter(4, IN_OUT); syscall_state.reg_parameter(5, IN_OUT); } return ALLOW_SWITCH; case Arch::pselect6: syscall_state.reg_parameter(2, IN_OUT); syscall_state.reg_parameter(3, IN_OUT); syscall_state.reg_parameter(4, IN_OUT); syscall_state.reg_parameter(5, IN_OUT); return ALLOW_SWITCH; case Arch::recvfrom: { syscall_state.reg_parameter( 2, ParamSize::from_syscall_result( t->regs().arg3())); auto addrlen_ptr = syscall_state.reg_parameter(6, IN_OUT); syscall_state.reg_parameter( 5, ParamSize::from_initialized_mem(t, addrlen_ptr)); return ALLOW_SWITCH; } case Arch::recvmsg: { auto msgp = syscall_state.reg_parameter(2, IN_OUT); prepare_recvmsg( t, syscall_state, msgp, ParamSize::from_syscall_result()); if (!((int)t->regs().arg3() & MSG_DONTWAIT)) { return ALLOW_SWITCH; } return PREVENT_SWITCH; } case Arch::recvmmsg: { auto vlen = (unsigned int)t->regs().arg3(); auto mmsgp = syscall_state.reg_parameter(2, sizeof(typename Arch::mmsghdr) * vlen, IN_OUT) .cast(); prepare_recvmmsg(t, syscall_state, mmsgp, vlen); if (!((unsigned int)t->regs().arg4() & MSG_DONTWAIT)) { return ALLOW_SWITCH; } return PREVENT_SWITCH; } case Arch::sendmsg: if (!((unsigned int)t->regs().arg3() & MSG_DONTWAIT)) { return ALLOW_SWITCH; } return PREVENT_SWITCH; case Arch::sendmmsg: { auto vlen = (unsigned int)t->regs().arg3(); syscall_state.reg_parameter(2, sizeof(typename Arch::mmsghdr) * vlen, IN_OUT); if (!((unsigned int)t->regs().arg4() & MSG_DONTWAIT)) { return ALLOW_SWITCH; } return PREVENT_SWITCH; } case Arch::getsockname: case Arch::getpeername: { auto addrlen_ptr = syscall_state.reg_parameter(3, IN_OUT); syscall_state.reg_parameter( 2, ParamSize::from_initialized_mem(t, addrlen_ptr)); return PREVENT_SWITCH; } case Arch::getsockopt: { auto optlen_ptr = syscall_state.reg_parameter(5, IN_OUT); syscall_state.reg_parameter( 4, ParamSize::from_initialized_mem(t, optlen_ptr)); return PREVENT_SWITCH; } case Arch::pread64: /* ssize_t read(int fd, void *buf, size_t count); */ case Arch::read: syscall_state.reg_parameter( 2, ParamSize::from_syscall_result( (size_t)t->regs().arg3())); return ALLOW_SWITCH; case Arch::accept: case Arch::accept4: { auto addrlen_ptr = syscall_state.reg_parameter(3, IN_OUT); syscall_state.reg_parameter( 2, ParamSize::from_initialized_mem(t, addrlen_ptr)); return ALLOW_SWITCH; } case Arch::getcwd: { syscall_state.reg_parameter( 1, ParamSize::from_syscall_result( (size_t)t->regs().arg2())); return PREVENT_SWITCH; } case Arch::getdents: case Arch::getdents64: { syscall_state.reg_parameter(2, ParamSize::from_syscall_result( (unsigned int)t->regs().arg3())); return PREVENT_SWITCH; } case Arch::readlink: { syscall_state.reg_parameter( 2, ParamSize::from_syscall_result( (size_t)t->regs().arg3())); return PREVENT_SWITCH; } case Arch::readlinkat: { syscall_state.reg_parameter( 3, ParamSize::from_syscall_result( (size_t)t->regs().arg4())); return PREVENT_SWITCH; } case Arch::getgroups: { // We could record a little less data by restricting the recorded data // to the syscall result * sizeof(Arch::legacy_gid_t), but that would // require more infrastructure and it's not worth worrying about. syscall_state.reg_parameter(2, (int)t->regs().arg1_signed() * sizeof(typename Arch::legacy_gid_t)); return PREVENT_SWITCH; } case Arch::getgroups32: { // We could record a little less data by restricting the recorded data // to the syscall result * sizeof(Arch::gid_t), but that would // require more infrastructure and it's not worth worrying about. syscall_state.reg_parameter(2, (int)t->regs().arg1_signed() * sizeof(typename Arch::gid_t)); return PREVENT_SWITCH; } case Arch::write: case Arch::writev: { int fd = (int)t->regs().arg1_signed(); return t->fd_table()->will_write(t, fd); } /* ssize_t readv(int fd, const struct iovec *iov, int iovcnt); */ case Arch::readv: /* ssize_t preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset); */ case Arch::preadv: { int iovcnt = (int)t->regs().arg3_signed(); remote_ptr iovecsp_void = syscall_state.reg_parameter( 2, sizeof(typename Arch::iovec) * iovcnt, IN); auto iovecsp = iovecsp_void.cast(); auto iovecs = t->read_mem(iovecsp, iovcnt); ParamSize io_size = ParamSize::from_syscall_result(); for (int i = 0; i < iovcnt; ++i) { syscall_state.mem_ptr_parameter(REMOTE_PTR_FIELD(iovecsp + i, iov_base), io_size.limit_size(iovecs[i].iov_len)); } return ALLOW_SWITCH; } /* pid_t waitpid(pid_t pid, int *status, int options); */ /* pid_t wait4(pid_t pid, int *status, int options, struct rusage * *rusage); */ case Arch::waitpid: case Arch::wait4: { syscall_state.syscall_entry_registers = unique_ptr(new Registers(t->regs())); syscall_state.reg_parameter(2, IN_OUT); if (syscallno == Arch::wait4) { syscall_state.reg_parameter(4); } pid_t pid = (pid_t)t->regs().arg1_signed(); if (pid < -1) { t->in_wait_type = WAIT_TYPE_PGID; t->in_wait_pid = -pid; } else if (pid == -1) { t->in_wait_type = WAIT_TYPE_ANY; } else if (pid == 0) { t->in_wait_type = WAIT_TYPE_SAME_PGID; } else { t->in_wait_type = WAIT_TYPE_PID; t->in_wait_pid = pid; } if (maybe_emulate_wait(t, syscall_state)) { Registers r = t->regs(); // Set options to an invalid value to force syscall to fail r.set_arg3(0xffffffff); t->set_regs(r); return PREVENT_SWITCH; } maybe_pause_instead_of_waiting(t); return ALLOW_SWITCH; } case Arch::waitid: { syscall_state.syscall_entry_registers = unique_ptr(new Registers(t->regs())); syscall_state.reg_parameter(3, IN_OUT); t->in_wait_pid = (id_t)t->regs().arg2(); switch ((idtype_t)t->regs().arg1()) { case P_ALL: t->in_wait_type = WAIT_TYPE_ANY; break; case P_PID: t->in_wait_type = WAIT_TYPE_PID; break; case P_PGID: t->in_wait_type = WAIT_TYPE_PGID; break; default: syscall_state.expect_errno = EINVAL; break; } if (maybe_emulate_wait(t, syscall_state)) { Registers r = t->regs(); // Set options to an invalid value to force syscall to fail r.set_arg4(0xffffffff); t->set_regs(r); return PREVENT_SWITCH; } maybe_pause_instead_of_waiting(t); return ALLOW_SWITCH; } case Arch::setpriority: // The syscall might fail due to insufficient // permissions (e.g. while trying to decrease the nice value // while not root). // We'll choose to honor the new value anyway since we'd like // to be able to test configurations where a child thread // has a lower nice value than its parent, which requires // lowering the child's nice value. if ((int)t->regs().arg1_signed() == PRIO_PROCESS) { Task* target = (int)t->regs().arg2_signed() ? t->session().find_task((int)t->regs().arg2_signed()) : t; if (target) { LOG(debug) << "Setting nice value for tid " << t->tid << " to " << t->regs().arg3(); target->record_session().scheduler().update_task_priority( target, (int)t->regs().arg3_signed()); } } // Allow switching so we can switch to a lower priority task immediately return ALLOW_SWITCH; case Arch::pause: return ALLOW_SWITCH; /* int poll(struct pollfd *fds, nfds_t nfds, int timeout) */ /* int ppoll(struct pollfd *fds, nfds_t nfds, * const struct timespec *timeout_ts, * const sigset_t *sigmask); */ case Arch::poll: case Arch::ppoll: { auto nfds = (nfds_t)t->regs().arg2(); syscall_state.reg_parameter(1, sizeof(typename Arch::pollfd) * nfds, IN_OUT); return ALLOW_SWITCH; } case Arch::close: syscall_state.syscall_entry_registers = unique_ptr(new Registers(t->regs())); if (!t->fd_table()->allow_close((int)t->regs().arg1())) { // Don't let processes close this fd. Abort with EBADF by setting // oldfd to -1, as if the fd is already closed. Registers r = t->regs(); r.set_arg1(intptr_t(-1)); t->set_regs(r); } return PREVENT_SWITCH; case Arch::dup2: case Arch::dup3: syscall_state.syscall_entry_registers = unique_ptr(new Registers(t->regs())); if (!t->fd_table()->allow_close((int)t->regs().arg2())) { // Don't let processes dup over this fd. Abort with EBADF by setting // oldfd to -1. Registers r = t->regs(); r.set_arg1(intptr_t(-1)); t->set_regs(r); } return PREVENT_SWITCH; /* int prctl(int option, unsigned long arg2, unsigned long arg3, unsigned * long arg4, unsigned long arg5); */ case Arch::prctl: syscall_state.syscall_entry_registers = unique_ptr(new Registers(t->regs())); switch ((int)t->regs().arg1_signed()) { case PR_GET_ENDIAN: case PR_GET_FPEMU: case PR_GET_FPEXC: case PR_GET_PDEATHSIG: case PR_GET_UNALIGN: syscall_state.reg_parameter(2); break; case PR_GET_KEEPCAPS: case PR_GET_NO_NEW_PRIVS: case PR_GET_TIMERSLACK: case PR_MCE_KILL: case PR_MCE_KILL_GET: case PR_SET_KEEPCAPS: case PR_SET_PDEATHSIG: case PR_SET_TIMERSLACK: break; case PR_SET_DUMPABLE: if (t->regs().arg2() == 0) { // Don't let processes make themslves undumpable. If a process // becomes undumpable, calling perf_event_open on it fails. Registers r = t->regs(); r.set_arg1(intptr_t(-1)); t->set_regs(r); syscall_state.emulate_result(0); t->task_group()->dumpable = false; } else if (t->regs().arg2() == 1) { t->task_group()->dumpable = true; } break; case PR_GET_DUMPABLE: syscall_state.emulate_result(t->task_group()->dumpable); break; case PR_GET_SECCOMP: syscall_state.emulate_result(t->prctl_seccomp_status); break; case PR_GET_TSC: { // Prevent the actual GET_TSC call. We force-return PR_TSC_ENABLE. Registers r = t->regs(); r.set_arg1(intptr_t(-1)); t->set_regs(r); syscall_state.emulate_result(0); t->write_mem(syscall_state.reg_parameter(2, IN_OUT_NO_SCRATCH), PR_TSC_ENABLE); break; } case PR_GET_NAME: syscall_state.reg_parameter(2, 16); break; case PR_SET_NAME: t->update_prname(t->regs().arg2()); break; case PR_SET_NO_NEW_PRIVS: if ((unsigned long)t->regs().arg2() != 1) { syscall_state.expect_errno = EINVAL; } break; case PR_SET_SECCOMP: // Allow all known seccomp calls. We must allow the seccomp call // that rr triggers when spawning the initial tracee. switch ((unsigned long)t->regs().arg2()) { case SECCOMP_MODE_STRICT: break; case SECCOMP_MODE_FILTER: { // If we're bootstrapping then this must be rr's own syscall // filter, so just install it normally now. if (t->session().can_validate()) { // Prevent the actual prctl call. We'll fix this up afterwards. Registers r = t->regs(); r.set_arg1(intptr_t(-1)); t->set_regs(r); } break; } default: syscall_state.expect_errno = EINVAL; break; } break; case PR_SET_PTRACER: { // Prevent any PR_SET_PTRACER call, but pretend it succeeded, since // we don't want any interference with our ptracing. Registers r = t->regs(); r.set_arg1(intptr_t(-1)); t->set_regs(r); syscall_state.emulate_result(0); break; } default: syscall_state.expect_errno = EINVAL; break; } return PREVENT_SWITCH; case Arch::arch_prctl: switch ((int)t->regs().arg1_signed()) { case ARCH_SET_FS: case ARCH_SET_GS: break; case ARCH_GET_FS: case ARCH_GET_GS: syscall_state.reg_parameter(2); break; default: syscall_state.expect_errno = EINVAL; break; } return PREVENT_SWITCH; case Arch::ioctl: return prepare_ioctl(t, syscall_state); case Arch::_sysctl: { auto argsp = syscall_state.reg_parameter(1, IN); auto oldlenp = syscall_state.mem_ptr_parameter_inferred( REMOTE_PTR_FIELD(argsp, oldlenp), IN_OUT); syscall_state.mem_ptr_parameter( REMOTE_PTR_FIELD(argsp, oldval), ParamSize::from_initialized_mem(t, oldlenp)); return PREVENT_SWITCH; } case Arch::quotactl: switch (t->regs().arg1() >> SUBCMDSHIFT) { case Q_GETQUOTA: syscall_state.reg_parameter(4); break; case Q_GETINFO: syscall_state.reg_parameter(4); break; case Q_GETFMT: syscall_state.reg_parameter(4); break; case Q_SETQUOTA: FATAL() << "Trying to set disk quota usage, this may interfere with " "rr recording"; // not reached case Q_QUOTAON: case Q_QUOTAOFF: case Q_SETINFO: case Q_SYNC: break; default: syscall_state.expect_errno = EINVAL; break; } return PREVENT_SWITCH; /* int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int * timeout); */ case Arch::epoll_wait: syscall_state.reg_parameter(2, sizeof(typename Arch::epoll_event) * t->regs().arg3_signed()); return ALLOW_SWITCH; /* The following two syscalls enable context switching not for * liveness/correctness reasons, but rather because if we * didn't context-switch away, rr might end up busy-waiting * needlessly. In addition, albeit far less likely, the * client program may have carefully optimized its own context * switching and we should take the hint. */ /* int nanosleep(const struct timespec *req, struct timespec *rem); */ case Arch::nanosleep: syscall_state.reg_parameter(2); return ALLOW_SWITCH; case Arch::sched_yield: // Force |t| to be context-switched if another thread // of equal or higher priority is available. We set // the counter to INT_MAX / 2 because various other // irrelevant events intervening between now and // scheduling may increment t's event counter, and we // don't want it to overflow. t->succ_event_counter = numeric_limits::max() / 2; // We're just pretending that t is blocked. The next // time its scheduling slot opens up, it's OK to // blocking-waitpid on t to see its status change. t->pseudo_blocked = true; t->record_session().scheduler().schedule_one_round_robin(t); return ALLOW_SWITCH; case Arch::rt_sigpending: syscall_state.reg_parameter(1, (size_t)t->regs().arg2()); return PREVENT_SWITCH; case Arch::rt_sigtimedwait: syscall_state.reg_parameter(2); return ALLOW_SWITCH; case Arch::rt_sigsuspend: case Arch::sigsuspend: t->sigsuspend_blocked_sigs = unique_ptr( new sig_set_t(t->read_mem(remote_ptr(t->regs().arg1())))); return ALLOW_SWITCH; case Arch::rt_sigprocmask: case Arch::sigprocmask: { syscall_state.reg_parameter(3); remote_ptr setp = t->regs().arg2(); if (!setp.is_null()) { auto sig_set = t->read_mem(setp); syscall_state.saved_data.resize(sizeof(sig_set)); memcpy(syscall_state.saved_data.data(), &sig_set, sizeof(sig_set)); // Don't let the tracee block TIME_SLICE_SIGNAL or // SYSCALLBUF_DESCHED_SIGNAL. sig_set &= ~(uint64_t(1) << (PerfCounters::TIME_SLICE_SIGNAL - 1)) & ~(uint64_t(1) << (SYSCALLBUF_DESCHED_SIGNAL - 1)); t->write_mem(setp, sig_set); } return PREVENT_SWITCH; } case Arch::getxattr: case Arch::lgetxattr: case Arch::fgetxattr: syscall_state.reg_parameter( 3, ParamSize::from_syscall_result(t->regs().arg4())); return PREVENT_SWITCH; case Arch::sched_setaffinity: { syscall_state.syscall_entry_registers = unique_ptr(new Registers(t->regs())); // Ignore all sched_setaffinity syscalls. They might interfere // with our own affinity settings. Registers r = t->regs(); // Set arg1 to an invalid PID to ensure this syscall is ignored. r.set_arg1(-1); t->set_regs(r); syscall_state.emulate_result(0); return PREVENT_SWITCH; } case Arch::sched_getaffinity: syscall_state.reg_parameter(3, ParamSize::from_syscall_result( (unsigned int)t->regs().arg2())); return PREVENT_SWITCH; case Arch::ptrace: return prepare_ptrace(t, syscall_state); case Arch::vfork: { Registers r = t->regs(); r.set_original_syscallno(Arch::fork); t->set_regs(r); return PREVENT_SWITCH; } case Arch::mincore: syscall_state.reg_parameter(3, (t->regs().arg2() + page_size() - 1) / page_size()); return PREVENT_SWITCH; case Arch::shmctl: return prepare_shmctl(t, syscall_state, (int)t->regs().arg2_signed(), 3); case Arch::semctl: return prepare_semctl( t, syscall_state, (int)t->regs().arg1_signed(), (int)t->regs().arg3_signed(), 4, USE_DIRECTLY); case Arch::seccomp: syscall_state.syscall_entry_registers = unique_ptr(new Registers(t->regs())); switch ((unsigned int)t->regs().arg1()) { case SECCOMP_SET_MODE_STRICT: break; case SECCOMP_SET_MODE_FILTER: { // Prevent the actual seccomp call. We'll fix this up afterwards. Registers r = t->regs(); r.set_arg1(intptr_t(-1)); t->set_regs(r); break; } default: syscall_state.expect_errno = EINVAL; break; } return PREVENT_SWITCH; case Arch::madvise: switch ((int)t->regs().arg3()) { case MADV_NORMAL: case MADV_RANDOM: case MADV_SEQUENTIAL: case MADV_WILLNEED: case MADV_DONTNEED: case MADV_REMOVE: case MADV_DONTFORK: case MADV_DOFORK: case MADV_SOFT_OFFLINE: case MADV_HWPOISON: case MADV_MERGEABLE: case MADV_UNMERGEABLE: case MADV_HUGEPAGE: case MADV_NOHUGEPAGE: case MADV_DONTDUMP: case MADV_DODUMP: break; default: syscall_state.expect_errno = EINVAL; } return PREVENT_SWITCH; case Arch::personality: switch ((int)t->regs().arg1()) { case PER_LINUX: // The default personality requires no handling. break; case 0xffffffff: // A special argument that only returns the existing personality. break; default: syscall_state.expect_errno = EINVAL; } return PREVENT_SWITCH; case Arch::mmap: syscall_state.syscall_entry_registers = unique_ptr(new Registers(t->regs())); switch (Arch::mmap_semantics) { case Arch::StructArguments: { auto args = t->read_mem( remote_ptr(t->regs().arg1())); // XXX fix this ASSERT(t, !(args.flags & MAP_GROWSDOWN)); break; } case Arch::RegisterArguments: { Registers r = t->regs(); r.set_arg4(r.arg4_signed() & ~MAP_GROWSDOWN); t->set_regs(r); break; } } return PREVENT_SWITCH; case Arch::mmap2: { syscall_state.syscall_entry_registers = unique_ptr(new Registers(t->regs())); Registers r = t->regs(); r.set_arg4(r.arg4_signed() & ~MAP_GROWSDOWN); t->set_regs(r); return PREVENT_SWITCH; } case Arch::mprotect: syscall_state.syscall_entry_registers = unique_ptr(new Registers(t->regs())); // Since we're stripping MAP_GROWSDOWN from kernel mmap calls, we need // to implement PROT_GROWSDOWN ourselves. t->vm()->fixup_mprotect_growsdown_parameters(t); return PREVENT_SWITCH; case Arch::brk: case Arch::munmap: case Arch::rrcall_init_buffers: case Arch::rrcall_init_preload: case Arch::rrcall_notify_syscall_hook_exit: case Arch::shmat: case Arch::shmdt: case Arch::unshare: return PREVENT_SWITCH; default: // Invalid syscalls return -ENOSYS. Assume any such // result means the syscall was completely ignored by the // kernel so it's OK for us to not do anything special. // Other results mean we probably need to understand this // syscall, but we don't. syscall_state.expect_errno = ENOSYS; return PREVENT_SWITCH; } } static Switchable rec_prepare_syscall_internal( Task* t, TaskSyscallState& syscall_state) { RR_ARCH_FUNCTION(rec_prepare_syscall_arch, t->arch(), t, syscall_state) } Switchable rec_prepare_syscall(Task* t) { auto& syscall_state = syscall_state_property.get_or_create(*t); syscall_state.init(t); Switchable s = rec_prepare_syscall_internal(t, syscall_state); int syscallno = t->ev().Syscall().number; if (is_sigreturn(syscallno, t->arch())) { // There isn't going to be an exit event for this syscall, so remove // syscall_state now. syscall_state_property.remove(*t); return s; } return syscall_state.done_preparing(s); } template static void rec_prepare_restart_syscall_arch(Task* t, TaskSyscallState& syscall_state) { int syscallno = t->ev().Syscall().number; switch (syscallno) { case Arch::nanosleep: /* Hopefully uniquely among syscalls, nanosleep() * requires writing to its remaining-time outparam * *only if* the syscall fails with -EINTR. When a * nanosleep() is interrupted by a signal, we don't * know a priori whether it's going to be eventually * restarted or not. (Not easily, anyway.) So we * don't know whether it will eventually return -EINTR * and would need the outparam written. To resolve * that, we do what the kernel does, and update the * outparam at the -ERESTART_RESTART interruption * regardless. */ syscall_state.process_syscall_results(); break; case Arch::wait4: case Arch::waitid: case Arch::waitpid: { Registers r = t->regs(); r.set_original_syscallno( syscall_state.syscall_entry_registers->original_syscallno()); t->set_regs(r); t->in_wait_type = WAIT_TYPE_NONE; break; } } } static void rec_prepare_restart_syscall_internal( Task* t, TaskSyscallState& syscall_state) { RR_ARCH_FUNCTION(rec_prepare_restart_syscall_arch, t->arch(), t, syscall_state); } void rec_prepare_restart_syscall(Task* t) { auto& syscall_state = *syscall_state_property.get(*t); rec_prepare_restart_syscall_internal(t, syscall_state); syscall_state_property.remove(*t); } enum ScratchAddrType { FIXED_ADDRESS, DYNAMIC_ADDRESS }; /* Pointer used when running RR in WINE. Memory below this address is unmapped by WINE immediately after exec, so start the scratch buffer here. */ static const uintptr_t FIXED_SCRATCH_PTR = 0x68000000; static void init_scratch_memory(Task* t, ScratchAddrType addr_type = DYNAMIC_ADDRESS) { const int scratch_size = 512 * page_size(); size_t sz = scratch_size; // The PROT_EXEC looks scary, and it is, but it's to prevent // this region from being coalesced with another anonymous // segment mapped just after this one. If we named this // segment, we could remove this hack. int prot = PROT_READ | PROT_WRITE | PROT_EXEC; int flags = MAP_PRIVATE | MAP_ANONYMOUS; { /* initialize the scratchpad for blocking system calls */ AutoRemoteSyscalls remote(t); if (addr_type == DYNAMIC_ADDRESS) { t->scratch_ptr = remote.infallible_mmap_syscall(remote_ptr(), sz, prot, flags, -1, 0); } else { t->scratch_ptr = remote.infallible_mmap_syscall(remote_ptr(FIXED_SCRATCH_PTR), sz, prot, flags | MAP_FIXED, -1, 0); } t->scratch_size = scratch_size; } // record this mmap for the replay Registers r = t->regs(); uintptr_t saved_result = r.syscall_result(); r.set_syscall_result(t->scratch_ptr); t->set_regs(r); KernelMapping km = t->vm()->map(t->scratch_ptr, sz, prot, flags, 0, string(), KernelMapping::NO_DEVICE, KernelMapping::NO_INODE); struct stat stat; memset(&stat, 0, sizeof(stat)); auto record_in_trace = t->trace_writer().write_mapped_region(km, stat); ASSERT(t, record_in_trace == TraceWriter::DONT_RECORD_IN_TRACE); r.set_syscall_result(saved_result); t->set_regs(r); } static void process_execve(Task* t, TaskSyscallState& syscall_state) { Registers r = t->regs(); if (r.syscall_failed()) { return; } t->post_exec_syscall(*syscall_state.exec_saved_event); t->record_session().trace_writer().write_task_event( *syscall_state.exec_saved_event); KernelMapping vvar; // Write out stack mappings first since during replay we need to set up the // stack before any files get mapped. vector stacks; for (auto m : t->vm()->maps()) { auto& km = m.map; if (km.is_stack()) { stacks.push_back(km); } else if (km.is_vvar()) { vvar = km; } } { AutoRemoteSyscalls remote(t, AutoRemoteSyscalls::DISABLE_MEMORY_PARAMS); if (vvar.size()) { // We're not going to map [vvar] during replay --- that wouldn't // make sense, since it contains data from the kernel that isn't correct // for replay, and we patch out the vdso syscalls that would use it. // Unmapping it now makes recording look more like replay. // Also note that under 4.0.7-300.fc22.x86_64 (at least) /proc//mem // can't read the contents of [vvar]. remote.infallible_syscall(syscall_number_for_munmap(remote.arch()), vvar.start(), vvar.size()); t->vm()->unmap(vvar.start(), vvar.size()); } for (auto& km : stacks) { auto mode = t->trace_writer().write_mapped_region( km, km.fake_stat(), TraceWriter::EXEC_MAPPING); ASSERT(t, mode == TraceWriter::RECORD_IN_TRACE); auto buf = t->read_mem(km.start().cast(), km.size()); t->trace_writer().write_raw(buf.data(), km.size(), km.start()); // Remove MAP_GROWSDOWN from stacks by remapping the memory and // writing the contents back. int flags = (km.flags() & ~MAP_GROWSDOWN) | MAP_ANONYMOUS; remote.infallible_syscall(syscall_number_for_munmap(remote.arch()), km.start(), km.size()); if (!t->vm()->has_mapping(km.start() - page_size())) { // Unmap an extra page at the start; this seems to be necessary // to properly wipe out the growsdown mapping. Doing it as a separate // munmap call also seems to be necessary. remote.infallible_syscall(syscall_number_for_munmap(remote.arch()), km.start() - page_size(), page_size()); } remote.infallible_mmap_syscall(km.start(), km.size(), km.prot(), flags, -1, 0); t->write_mem(km.start().cast(), buf.data(), buf.size()); } } // The kernel may zero part of the last page in each data mapping according // to ELF BSS metadata. So we record the last page of each data mapping in // the trace. vector > pages_to_record; for (auto m : t->vm()->maps()) { auto& km = m.map; if (km.start() == AddressSpace::rr_page_start()) { continue; } if (km.is_stack() || km.is_vsyscall()) { // [stack] has already been handled. // [vsyscall] can't be read via /proc//mem, *should* // be the same across all execs, and can't be munmapped so we can't fix // it even if it does vary. Plus no-one should be using it anymore. continue; } struct stat st; if (stat(km.fsname().c_str(), &st) != 0) { st = km.fake_stat(); } if (t->trace_writer().write_mapped_region(km, st, TraceWriter::EXEC_MAPPING) == TraceWriter::RECORD_IN_TRACE) { if (st.st_size > 0) { off64_t end = (off64_t)st.st_size - km.file_offset_bytes(); t->record_remote(km.start(), min(end, (off64_t)km.size())); } else { // st_size is not valid. Some device files are mmappable but have zero // size. We also take this path if there's no file at all (vdso etc). t->record_remote(km.start(), km.size()); } } else { // See https://github.com/mozilla/rr/issues/1568; in some cases // after exec we have memory areas that are rwx. These areas have // a trailing page that may be partially zeroed by the kernel. Record the // trailing page of every mapping just to be simple and safe. pages_to_record.push_back(km.end() - page_size()); } } init_scratch_memory(t, FIXED_ADDRESS); for (auto& p : pages_to_record) { t->record_remote(p, page_size()); } // Patch LD_PRELOAD and VDSO after saving the mappings. Replay will apply // patches to the saved mappings. t->vm()->monkeypatcher().patch_after_exec(t); } static void process_mmap(Task* t, size_t length, int prot, int flags, int fd, off_t offset_pages) { size_t size = ceil_page_size(length); off64_t offset = offset_pages * 4096; if (t->regs().syscall_failed()) { // We purely emulate failed mmaps. return; } remote_ptr addr = t->regs().syscall_result(); if (flags & MAP_ANONYMOUS) { if (flags & MAP_PRIVATE) { // Anonymous mappings are by definition not backed by any file-like // object, and are initialized to zero, so there's no nondeterminism to // record. t->vm()->map(addr, size, prot, flags, 0, string(), KernelMapping::NO_DEVICE, KernelMapping::NO_INODE); } else { ASSERT(t, !(flags & MAP_GROWSDOWN)); // Read the kernel's mapping. There doesn't seem to be any other way to // get the correct device/inode numbers. Fortunately anonymous shared // mappings are rare. KernelMapping kernel_info = t->vm()->read_kernel_mapping(t, addr); KernelMapping km = t->vm()->map(addr, size, prot, flags, 0, kernel_info.fsname(), kernel_info.device(), kernel_info.inode()); auto d = t->trace_writer().write_mapped_region(km, km.fake_stat()); ASSERT(t, d == TraceWriter::DONT_RECORD_IN_TRACE); } return; } ASSERT(t, fd >= 0) << "Valid fd required for file mapping"; ASSERT(t, !(flags & MAP_GROWSDOWN)); // TODO: save a reflink copy of the resource to the // trace directory as |fs/[st_dev].[st_inode]|. Then // we wouldn't have to care about looking up a name // for the resource. auto result = t->stat_fd(fd); string file_name = t->file_name_of_fd(fd); KernelMapping km = t->vm()->map(addr, size, prot, flags, offset, file_name, result.st_dev, result.st_ino); if (t->trace_writer().write_mapped_region(km, result) == TraceWriter::RECORD_IN_TRACE) { if (result.st_size > 0) { off64_t end = (off64_t)result.st_size - offset; t->record_remote(addr, min(end, (off64_t)size)); } else { // st_size is not valid. Some device files are mmappable but have zero // size. t->record_remote(addr, size); } } if ((prot & PROT_WRITE) && (flags & MAP_SHARED)) { LOG(debug) << file_name << " is SHARED|WRITEABLE; that's not handled " "correctly yet. Optimistically hoping it's not " "written by programs outside the rr tracee " "tree."; } t->vm()->monkeypatcher().patch_after_mmap(t, addr, size, offset_pages, fd); } static void process_shmat(Task* t, int shmid, int shm_flags, remote_ptr addr) { if (t->regs().syscall_failed()) { // We purely emulate failed shmats. return; } struct shmid64_ds ds; int ret = _shmctl(shmid, IPC_STAT, &ds); ASSERT(t, !ret) << "shmid should be readable by rr since rr has the same " "UID as tracees"; size_t size = ceil_page_size(ds.shm_segsz); int prot = shm_flags_to_mmap_prot(shm_flags); int flags = MAP_SHARED; // Read the kernel's mapping for the shm segment. There doesn't seem to be // any other way to get the correct device number. (The inode number seems to // be the shm key.) This should be OK since SysV shmem is not used very much // and reading /proc//maps should be reasonably cheap. KernelMapping kernel_info = t->vm()->read_kernel_mapping(t, addr); KernelMapping km = t->vm()->map(addr, size, prot, flags, 0, kernel_info.fsname(), kernel_info.device(), kernel_info.inode()); if (t->trace_writer().write_mapped_region(km, km.fake_stat()) == TraceWriter::RECORD_IN_TRACE) { t->record_remote(addr, size); } LOG(debug) << "Optimistically hoping that SysV segment is not used outside " "of tracees"; } template static void process_fork(Task* t, TaskSyscallState& syscall_state) { if (t->regs().syscall_result_signed() < 0) { // fork failed. return; } // Note that the tid returned in the syscall result may be in a pid // namespace that's different from ours, so we should avoid using it // directly. Instead the new task will have been stashed in syscall_state. Task* new_task = syscall_state.new_task; ASSERT(t, new_task) << "new_task not found"; t->record_session().trace_writer().write_task_event( TraceTaskEvent(new_task->tid, t->tid)); init_scratch_memory(new_task); } template static void process_clone(Task* t, TaskSyscallState& syscall_state) { uintptr_t flags = syscall_state.syscall_entry_registers->arg1(); // Restore modified registers in cloning task Registers r = t->regs(); r.set_arg1(flags); // On a 3.19.0-39-generic #44-Ubuntu kernel we have observed clone() // clearing the parity flag internally. r.set_flags(syscall_state.syscall_entry_registers->flags()); t->set_regs(r); if (t->regs().syscall_result_signed() < 0) { // clone failed. return; } // Note that the tid returned in the syscall result may be in a pid // namespace that's different from ours, so we should avoid using it // directly. Instead the new task will have been stashed in syscall_state. Task* new_task = syscall_state.new_task; ASSERT(t, new_task) << "new_task not found"; // Restore modified registers in cloned task Registers new_r = new_task->regs(); new_r.set_arg1(flags); new_task->set_regs(new_r); new_task->push_event(SyscallEvent(t->ev().Syscall().number, t->arch())); /* record child id here */ remote_ptr* stack_not_needed = nullptr; remote_ptr parent_tid_in_parent, parent_tid_in_child; remote_ptr tls_in_parent, tls_in_child; remote_ptr child_tid_in_parent, child_tid_in_child; extract_clone_parameters(t, stack_not_needed, &parent_tid_in_parent, &tls_in_parent, &child_tid_in_parent); extract_clone_parameters(new_task, stack_not_needed, &parent_tid_in_child, &tls_in_child, &child_tid_in_child); // If these flags aren't set, the corresponding clone parameters may be // invalid pointers, so make sure they're ignored. if (!(flags & CLONE_PARENT_SETTID)) { parent_tid_in_parent = nullptr; parent_tid_in_child = nullptr; } if (!(flags & CLONE_CHILD_SETTID)) { child_tid_in_child = nullptr; } if (!(flags & CLONE_SETTLS)) { tls_in_parent = nullptr; tls_in_child = nullptr; } t->record_remote_even_if_null(parent_tid_in_parent); if (Arch::clone_tls_type == Arch::UserDescPointer) { t->record_remote_even_if_null( tls_in_parent.cast()); new_task->record_remote_even_if_null( tls_in_child.cast()); } else { assert(Arch::clone_tls_type == Arch::PthreadStructurePointer); } new_task->record_remote_even_if_null(parent_tid_in_child); new_task->record_remote_even_if_null(child_tid_in_child); new_task->pop_syscall(); t->record_session().trace_writer().write_task_event( TraceTaskEvent(new_task->tid, t->tid, flags)); init_scratch_memory(new_task); } template static string extra_expected_errno_info(Task* t, TaskSyscallState& syscall_state) { stringstream ss; switch (syscall_state.expect_errno) { case ENOSYS: ss << "; execution of syscall unsupported by rr"; break; case EINVAL: switch (t->regs().original_syscallno()) { case Arch::ioctl: { int request = (int)t->regs().arg2_signed(); int type = _IOC_TYPE(request); int nr = _IOC_NR(request); int dir = _IOC_DIR(request); int size = _IOC_SIZE(request); ss << "; Unknown ioctl(" << HEX(request) << "): type:" << HEX(type) << " nr:" << HEX(nr) << " dir:" << HEX(dir) << " size:" << size << " addr:" << HEX(t->regs().arg3()); break; } case Arch::quotactl: ss << "; unknown quotactl(" << HEX(t->regs().arg1() >> SUBCMDSHIFT) << ")"; break; case Arch::fcntl: case Arch::fcntl64: ss << "; unknown fcntl(" << HEX((int)t->regs().arg2_signed()) << ")"; break; case Arch::prctl: ss << "; unknown prctl(" << HEX((int)t->regs().arg1_signed()) << ")"; break; case Arch::arch_prctl: ss << "; unknown arch_prctl(" << HEX((int)t->regs().arg1_signed()) << ")"; break; case Arch::socketcall: ss << "; unknown socketcall(" << HEX((int)t->regs().arg1_signed()) << ")"; break; case Arch::ipc: ss << "; unknown ipc(" << HEX((int)t->regs().arg1_signed()) << ")"; break; case Arch::futex: ss << "; unknown futex(" << HEX((int)t->regs().arg2_signed() & FUTEX_CMD_MASK) << ")"; break; case Arch::waitid: ss << "; unknown waitid(" << HEX((idtype_t)t->regs().arg1()) << ")"; break; case Arch::seccomp: ss << "; unknown seccomp(" << HEX((unsigned int)t->regs().arg1()) << ")"; break; case Arch::madvise: ss << "; unknown madvise(" << (int)t->regs().arg3() << ")"; break; } break; case EIO: switch (t->regs().original_syscallno()) { case Arch::ptrace: ss << "; unsupported ptrace(" << HEX((int)t->regs().arg1()) << " [" << ptrace_req_name((int)t->regs().arg1_signed()) << "])"; break; } break; } return ss.str(); } template static void rec_process_syscall_arch(Task* t, TaskSyscallState& syscall_state) { int syscallno = t->ev().Syscall().number; LOG(debug) << t->tid << ": processing: " << t->ev() << " -- time: " << t->trace_time(); t->on_syscall_exit(syscallno, t->regs()); if (const struct syscallbuf_record* rec = t->desched_rec()) { t->record_local(t->syscallbuf_child.cast() + (rec->extra_data - (uint8_t*)t->syscallbuf_hdr), rec->size - sizeof(*rec), (uint8_t*)rec->extra_data); return; } if (syscall_state.expect_errno) { ASSERT(t, t->regs().syscall_result_signed() == -syscall_state.expect_errno) << "Expected " << errno_name(syscall_state.expect_errno) << " for '" << t->syscall_name(syscallno) << "' but got result " << t->regs().syscall_result_signed() << extra_expected_errno_info(t, syscall_state); return; } // Here we handle syscalls that need work that can only happen after the // syscall completes --- and that our TaskSyscallState infrastructure can't // handle. switch (syscallno) { case Arch::clone: { process_clone(t, syscall_state); break; } case Arch::vfork: { Registers r = t->regs(); r.set_original_syscallno(Arch::vfork); t->set_regs(r); process_fork(t, syscall_state); break; } case Arch::fork: process_fork(t, syscall_state); break; case Arch::execve: process_execve(t, syscall_state); break; case Arch::brk: { remote_ptr old_brk = ceil_page_size(t->vm()->current_brk()); remote_ptr new_brk = ceil_page_size(t->regs().syscall_result()); KernelMapping km; if (old_brk < new_brk) { // Read the kernel's mapping. There doesn't seem to be any other way to // get the correct prot bits for heaps. Usually it's READ|WRITE but // there seem to be exceptions depending on system settings. KernelMapping kernel_info = t->vm()->read_kernel_mapping(t, old_brk); ASSERT(t, km.device() == KernelMapping::NO_DEVICE); ASSERT(t, km.inode() == KernelMapping::NO_INODE); km = kernel_info.subrange(old_brk, new_brk); } else { // Write a dummy KernelMapping that indicates an unmap km = KernelMapping(new_brk, old_brk, string(), KernelMapping::NO_DEVICE, KernelMapping::NO_INODE, 0, 0, 0); } auto d = t->trace_writer().write_mapped_region(km, km.fake_stat()); ASSERT(t, d == TraceWriter::DONT_RECORD_IN_TRACE); t->vm()->brk(t->regs().syscall_result(), km.prot()); break; } case Arch::mmap: switch (Arch::mmap_semantics) { case Arch::StructArguments: { auto args = t->read_mem( remote_ptr(t->regs().arg1())); process_mmap(t, args.len, args.prot, args.flags, args.fd, args.offset / 4096); break; } case Arch::RegisterArguments: { Registers r = t->regs(); r.set_arg4(syscall_state.syscall_entry_registers->arg4_signed()); t->set_regs(r); process_mmap(t, (size_t)r.arg2(), (int)r.arg3_signed(), (int)r.arg4_signed(), (int)r.arg5_signed(), ((off_t)r.arg6_signed()) / 4096); break; } } break; case Arch::mmap2: { Registers r = t->regs(); r.set_arg4(syscall_state.syscall_entry_registers->arg4_signed()); t->set_regs(r); process_mmap(t, (size_t)r.arg2(), (int)r.arg3_signed(), (int)r.arg4_signed(), (int)r.arg5_signed(), (off_t)r.arg6_signed()); break; } case Arch::shmat: process_shmat(t, (int)t->regs().arg1_signed(), (int)t->regs().arg3_signed(), t->regs().syscall_result()); break; case Arch::ipc: switch ((int)t->regs().arg1_signed()) { case SHMAT: { auto out_ptr = t->read_mem( remote_ptr(t->regs().arg4())); process_shmat(t, (int)t->regs().arg2_signed(), (int)t->regs().arg3_signed(), out_ptr); break; } default: break; } break; case Arch::nanosleep: { /* If the sleep completes, the kernel doesn't * write back to the remaining-time * argument. */ if (!(int)t->regs().syscall_result_signed()) { syscall_state.write_back = TaskSyscallState::NO_WRITE_BACK; } break; } case Arch::open: { string pathname = t->read_c_str(remote_ptr(t->regs().arg1())); if (is_blacklisted_filename(pathname.c_str())) { /* NB: the file will still be open in the * process's file table, but let's hope this * gross hack dies before we have to worry * about that. */ LOG(warn) << "Cowardly refusing to open " << pathname; Registers r = t->regs(); r.set_syscall_result(-ENOENT); t->set_regs(r); } break; } case Arch::rt_sigsuspend: case Arch::sigsuspend: t->sigsuspend_blocked_sigs = nullptr; break; case Arch::rt_sigprocmask: case Arch::sigprocmask: { remote_ptr setp = t->regs().arg2(); if (!setp.is_null()) { // Restore modified sig_set t->write_bytes_helper(setp, syscall_state.saved_data.size(), syscall_state.saved_data.data()); } break; } case Arch::close: case Arch::dup2: case Arch::dup3: case Arch::fcntl: case Arch::fcntl64: case Arch::ptrace: case Arch::sched_setaffinity: case Arch::mprotect: { // Restore the registers that we may have altered. Registers r = t->regs(); r.set_arg1(syscall_state.syscall_entry_registers->arg1()); r.set_arg2(syscall_state.syscall_entry_registers->arg2()); r.set_arg3(syscall_state.syscall_entry_registers->arg3()); t->set_regs(r); break; } case Arch::waitpid: case Arch::wait4: case Arch::waitid: { t->in_wait_type = WAIT_TYPE_NONE; // Restore possibly-modified registers Registers r = t->regs(); r.set_arg1(syscall_state.syscall_entry_registers->arg1()); r.set_arg2(syscall_state.syscall_entry_registers->arg2()); r.set_arg3(syscall_state.syscall_entry_registers->arg3()); r.set_arg4(syscall_state.syscall_entry_registers->arg4()); r.set_original_syscallno( syscall_state.syscall_entry_registers->original_syscallno()); t->set_regs(r); if (syscall_state.ptraced_tracee) { // Finish emulation of ptrace result Registers r = t->regs(); r.set_syscall_result(syscall_state.ptraced_tracee->tid); t->set_regs(r); if (syscallno == Arch::waitid) { remote_ptr sip = r.arg3(); if (!sip.is_null()) { typename Arch::siginfo_t si; memset(&si, 0, sizeof(si)); si.si_signo = SIGCHLD; si.si_code = CLD_TRAPPED; si._sifields._sigchld.si_pid_ = syscall_state.ptraced_tracee->tgid(); si._sifields._sigchld.si_uid_ = syscall_state.ptraced_tracee->getuid(); si._sifields._sigchld.si_status_ = syscall_state.ptraced_tracee->emulated_ptrace_stop_code; t->write_mem(sip, si); } } else { remote_ptr statusp = r.arg2(); if (!statusp.is_null()) { t->write_mem( statusp, syscall_state.ptraced_tracee->emulated_ptrace_stop_code); } } if (syscallno == Arch::waitid && (r.arg4() & WNOWAIT)) { // Leave the child in a waitable state } else { syscall_state.ptraced_tracee->emulated_ptrace_stop_code = 0; } } break; } case Arch::prctl: { // Restore arg1 in case we modified it to disable the syscall Registers r = t->regs(); r.set_arg1(syscall_state.syscall_entry_registers->arg1()); t->set_regs(r); if (t->regs().arg1() == PR_SET_SECCOMP && t->session().can_validate()) { t->session() .as_record() ->seccomp_filter_rewriter() .install_patched_seccomp_filter(t); // install_patched_seccomp_filter can set registers to indicate // failure. if (!t->regs().syscall_failed()) { t->prctl_seccomp_status = 2; } } break; } case Arch::seccomp: { // Restore arg1 in case we modified it to disable the syscall Registers r = t->regs(); r.set_arg1(syscall_state.syscall_entry_registers->arg1()); t->set_regs(r); if (t->regs().arg1() == SECCOMP_SET_MODE_FILTER) { t->session() .as_record() ->seccomp_filter_rewriter() .install_patched_seccomp_filter(t); // install_patched_seccomp_filter can set registers to indicate // failure. ASSERT(t, t->session().can_validate()) << "no seccomp calls during spawn"; if (!t->regs().syscall_failed()) { t->prctl_seccomp_status = 2; } } break; } case SYS_rrcall_init_buffers: t->init_buffers(nullptr); break; case SYS_rrcall_init_preload: { t->vm()->at_preload_init(t); t->at_preload_init(); Registers r = t->regs(); r.set_syscall_result(0); t->set_regs(r); break; } case SYS_rrcall_notify_syscall_hook_exit: { t->syscallbuf_hdr->notify_on_syscall_hook_exit = false; t->record_local( REMOTE_PTR_FIELD(t->syscallbuf_child, notify_on_syscall_hook_exit), &t->syscallbuf_hdr->notify_on_syscall_hook_exit); struct rrcall_params { typename Arch::unsigned_word result; typename Arch::unsigned_word original_syscallno; }; Registers r = t->regs(); auto params_ptr = r.sp() + sizeof(typename Arch::unsigned_word); auto params = t->read_mem(params_ptr.cast()); r.set_syscall_result((uintptr_t)params.result); r.set_original_syscallno((intptr_t)params.original_syscallno); t->set_regs(r); break; } } } static void rec_process_syscall_internal(Task* t, TaskSyscallState& syscall_state) { RR_ARCH_FUNCTION(rec_process_syscall_arch, t->arch(), t, syscall_state) } void rec_process_syscall(Task* t) { auto& syscall_state = *syscall_state_property.get(*t); rec_process_syscall_internal(t, syscall_state); syscall_state.process_syscall_results(); syscall_state_property.remove(*t); } rr-4.1.0/src/record_syscall.h000066400000000000000000000021241265436462100161170ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_PROCESS_SYSCALL_H_ #define RR_PROCESS_SYSCALL_H_ #include "task.h" /** * Prepare |t| to enter its current syscall event. Return ALLOW_SWITCH if * a context-switch is allowed for |t|, PREVENT_SWITCH if not. * * Set |*kernel_sync_addr| to non-nullptr to force waiting on that memory * cell in the child's address space to become |sync_val|. This is an * overly general mechanism that's used for FUTEX_LOCK_PI. If you're * not FUTEX_LOCK_PI, you probably shouldn't be using this. */ Switchable rec_prepare_syscall(Task* t); /** * Prepare |t| for its current syscall event to be interrupted and * possibly restarted. */ void rec_prepare_restart_syscall(Task* t); /** * Inside a fork/clone syscall, notify that the new task created is new_task. */ void rec_set_syscall_new_task(Task* t, Task* new_task); /** * Restore any argument registers fudged for |t|'s current syscall and * store any nondeterministic outparam data. */ void rec_process_syscall(Task* t); #endif /* RR_PROCESS_SYSCALL_H_ */ rr-4.1.0/src/remote_code_ptr.cc000066400000000000000000000002341265436462100164170ustar00rootroot00000000000000#include "remote_code_ptr.h" std::ostream& operator<<(std::ostream& stream, remote_code_ptr p) { stream << (void*)p.register_value(); return stream; } rr-4.1.0/src/remote_code_ptr.h000066400000000000000000000045241265436462100162670ustar00rootroot00000000000000#ifndef RR_REMOTE_CODE_PTR_H_ #define RR_REMOTE_CODE_PTR_H_ #include #include #include "kernel_abi.h" /* * A pointer to code in the tracee address space. Convertible to a * remote_ptr. */ class remote_code_ptr { public: remote_code_ptr() : ptr(0) {} remote_code_ptr(uintptr_t ptr) : ptr(ptr) {} remote_code_ptr(std::nullptr_t null) : ptr(0) {} bool operator==(const remote_code_ptr& other) const { return ptr == other.ptr; } bool operator!=(const remote_code_ptr& other) const { return ptr != other.ptr; } // XXXkhuey this will have to get smarter once we have ARM. remote_code_ptr operator+(intptr_t delta) const { return remote_code_ptr(ptr + delta); } remote_code_ptr operator-(intptr_t delta) const { return remote_code_ptr(ptr - delta); } intptr_t operator-(remote_code_ptr other) const { return ptr - other.ptr; } // XXXkhuey this is somewhat arbitrary bool operator<(const remote_code_ptr& other) const { return ptr < other.ptr; } remote_code_ptr decrement_by_syscall_insn_length(SupportedArch arch) const { return remote_code_ptr(ptr - rr::syscall_instruction_length(arch)); } remote_code_ptr increment_by_syscall_insn_length(SupportedArch arch) const { return remote_code_ptr(ptr + rr::syscall_instruction_length(arch)); } remote_code_ptr decrement_by_bkpt_insn_length(SupportedArch arch) const { return remote_code_ptr(ptr - 1); } remote_code_ptr increment_by_bkpt_insn_length(SupportedArch arch) const { return remote_code_ptr(ptr + 1); } template remote_ptr to_data_ptr() const { return remote_ptr(to_data_ptr_value()); } // Return the pointer in a form suitable for storing in a register. Only // intended for use by Registers and the operator << uintptr_t register_value() const { return ptr; } private: // Return the integer value for this pointer viewed as a data pointer. // A no-op on Intel architectures, will mask off the thumb bit on ARM. uintptr_t to_data_ptr_value() const { return ptr; } uintptr_t ptr; }; std::ostream& operator<<(std::ostream& stream, remote_code_ptr p); namespace std { template <> struct hash { size_t operator()(const remote_code_ptr& ptr) const { return hash()(ptr.register_value()); } }; } // namespace std #endif // RR_REMOTE_CODE_PTR_H_ rr-4.1.0/src/remote_ptr.h000066400000000000000000000062461265436462100153000ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_REMOTE_PTR_H_ #define RR_REMOTE_PTR_H_ #include #include /** * Number of bytes to use as the element size when doing pointer arithmetic * on this type. We specialize 'void' to use 1 byte to make a lot of our * calculations easier. */ template size_t pointer_arithmetic_size() { return sizeof(T); } template <> inline size_t pointer_arithmetic_size() { return 1; } /** * A pointer to data in some tracee address space. * This lets us distinguish between real, usable pointers in rr's address space * and pointers that only make sense in a tracee address space. */ template class remote_ptr { public: remote_ptr() : ptr(0) {} remote_ptr(uintptr_t ptr) : ptr(ptr) {} remote_ptr(std::nullptr_t null) : ptr(0) {} template remote_ptr(remote_ptr p) : ptr(p.as_int()) { consume_dummy(static_cast(nullptr)); } uintptr_t as_int() const { return ptr; } remote_ptr operator+(intptr_t delta) const { return remote_ptr(ptr + delta * arith_size()); } remote_ptr operator-(intptr_t delta) const { return remote_ptr(ptr - delta * arith_size()); } remote_ptr& operator+=(intptr_t delta) { ptr += delta * arith_size(); return *this; } remote_ptr& operator-=(intptr_t delta) { ptr -= delta * arith_size(); return *this; } intptr_t operator-(remote_ptr other) const { return (ptr - other.ptr) / arith_size(); } remote_ptr& operator++() { ptr += arith_size(); return *this; } remote_ptr operator++(int) { uintptr_t p = ptr; ptr += arith_size(); return p; } remote_ptr& operator--() { ptr -= arith_size(); return *this; } remote_ptr operator--(int) { uintptr_t p = ptr; ptr -= arith_size(); return p; } template remote_ptr cast() const { return remote_ptr(ptr); } bool operator!() const { return !ptr; } bool operator<(const remote_ptr& other) const { return ptr < other.ptr; } bool operator<=(const remote_ptr& other) const { return ptr <= other.ptr; } bool operator==(const remote_ptr& other) const { return ptr == other.ptr; } bool operator!=(const remote_ptr& other) const { return ptr != other.ptr; } bool operator>(const remote_ptr& other) const { return ptr > other.ptr; } bool operator>=(const remote_ptr& other) const { return ptr >= other.ptr; } bool is_null() const { return !ptr; } template remote_ptr field(U& dummy) { return remote_ptr(ptr + reinterpret_cast(&dummy)); } T* dummy() { return nullptr; } size_t referent_size() { return sizeof(T); } private: static void consume_dummy(T*) {} static size_t arith_size() { return pointer_arithmetic_size(); } uintptr_t ptr; }; /** * returns a remote_ptr pointing to field f of the struct pointed to by * remote_ptr p */ #define REMOTE_PTR_FIELD(p, f) (p).field((p).dummy()->f) template std::ostream& operator<<(std::ostream& stream, remote_ptr p) { stream << (void*)p.as_int(); return stream; } #endif /* RR_REMOTE_PTR_H_ */ rr-4.1.0/src/replay_syscall.cc000066400000000000000000001337121265436462100163030ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "ProcessSyscallRep" #include "replay_syscall.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "preload/preload_interface.h" #include "AutoRemoteSyscalls.h" #include "EmuFs.h" #include "kernel_abi.h" #include "kernel_metadata.h" #include "log.h" #include "ReplaySession.h" #include "task.h" #include "TraceStream.h" #include "util.h" /* Uncomment this to check syscall names and numbers defined in syscalls.py against the definitions in unistd.h. This may cause the build to fail if unistd.h is slightly out of date, so it's not turned on by default. */ //#define CHECK_SYSCALL_NUMBERS using namespace std; using namespace rr; // XXX: x86-only currently. #ifdef CHECK_SYSCALL_NUMBERS // Hack because our 'break' syscall is called '_break' #define SYS__break SYS_break #include "CheckSyscallNumbers.generated" #endif // CHECK_SYSCALL_NUMBERS enum SyscallEntryOrExit { SYSCALL_ENTRY, SYSCALL_EXIT }; /** * Return the symbolic name of |state|, or "???state" if unknown. */ static const char* state_name(SyscallEntryOrExit state) { switch (state) { #define CASE(_id) \ case _id: \ return #_id CASE(SYSCALL_ENTRY); CASE(SYSCALL_EXIT); #undef CASE default: return "???state"; } } static string maybe_dump_written_string(Task* t) { if (!is_write_syscall(t->regs().original_syscallno(), t->arch())) { return ""; } size_t len = min(1000, t->regs().arg3()); vector buf; buf.resize(len + 1); buf.resize(t->read_bytes_fallible(t->regs().arg2(), len, buf.data()) + 1); buf[buf.size() - 1] = 0; return " \"" + string(buf.data()) + "\""; } /** * Proceeds until the next system call, which is being executed. */ static void __ptrace_cont(Task* t, int expect_syscallno) { do { uintptr_t saved_r11 = t->arch() == x86_64 ? t->regs().r11() : 0; t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS); if (t->arch() == x86_64) { // Restore previous R11 value. R11 gets reset to RFLAGS by the syscall, // and RFLAGS might have changed since we first entered the syscall // we're replaying (we reset it to 0x246 in fixup_syscall_registers). Registers r = t->regs(); r.set_r11(saved_r11); t->set_regs(r); } } while (ReplaySession::is_ignored_signal(t->stop_sig())); ASSERT(t, !t->pending_sig()) << "Expected no pending signal, but got " << t->pending_sig(); /* check if we are synchronized with the trace -- should never fail */ int current_syscall = t->regs().original_syscallno(); ASSERT(t, current_syscall == expect_syscallno) << "Should be at " << t->syscall_name(expect_syscallno) << ", but instead at " << t->syscall_name(current_syscall) << maybe_dump_written_string(t); } static void init_scratch_memory(Task* t, const KernelMapping& km, const TraceReader::MappedData& data) { /* Initialize the scratchpad as the recorder did, but make it * PROT_NONE. The idea is just to reserve the address space so * the replayed process address map looks like the recorded * process, if it were to be probed by madvise or some other * means. But we make it PROT_NONE so that rogue reads/writes * to the scratch memory are caught. */ ASSERT(t, data.source == TraceReader::SOURCE_ZERO); t->scratch_ptr = km.start(); t->scratch_size = km.size(); size_t sz = t->scratch_size; int prot = PROT_NONE; int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED; AutoRemoteSyscalls remote(t); remote.infallible_mmap_syscall(t->scratch_ptr, sz, prot, flags, -1, 0); t->vm()->map(t->scratch_ptr, sz, prot, flags, 0, string(), KernelMapping::NO_DEVICE, KernelMapping::NO_INODE, &km); } /** * If scratch data was incidentally recorded for the current desched'd * but write-only syscall, then do a no-op restore of that saved data * to keep the trace in sync. * * Syscalls like |write()| that may-block and are wrapped in the * preload library can be desched'd. When this happens, we save the * syscall record's "extra data" as if it were normal scratch space, * since it's used that way in effect. But syscalls like |write()| * that don't actually use scratch space don't ever try to restore * saved scratch memory during replay. So, this helper can be used * for that class of syscalls. */ static void maybe_noop_restore_syscallbuf_scratch(Task* t) { if (t->is_in_untraced_syscall()) { LOG(debug) << " noop-restoring scratch for write-only desched'd " << t->syscall_name(t->regs().original_syscallno()); t->set_data_from_trace(); } } /** * Return true iff the syscall represented by |frame| (either entry to * or exit from) failed. */ static bool is_failed_syscall(Task* t, const TraceFrame& frame) { TraceFrame next_frame; if (ENTERING_SYSCALL == frame.event().Syscall().state) { next_frame = t->trace_reader().peek_to(t->rec_tid, frame.event().type(), EXITING_SYSCALL); return next_frame.regs().syscall_failed(); } return frame.regs().syscall_failed(); } static ReplayTraceStepType syscall_action(SyscallEntryOrExit state) { return state == SYSCALL_ENTRY ? TSTEP_ENTER_SYSCALL : TSTEP_EXIT_SYSCALL; } static TraceTaskEvent read_task_trace_event(Task* t, TraceTaskEvent::Type type) { TraceTaskEvent tte; while (true) { ASSERT(t, t->trace_reader().good()) << "Unable to find TraceTaskEvent; " "trace is corrupt (did you kill -9 " "rr?)"; tte = t->trace_reader().read_task_event(); if (tte.type() == type) { break; } } return tte; } template static void process_clone(Task* t, const TraceFrame& trace_frame, SyscallEntryOrExit state, ReplayTraceStep* step, unsigned long flags, int expect_syscallno) { if (is_failed_syscall(t, trace_frame)) { /* creation failed, emulate it */ return; } if (state == SYSCALL_ENTRY) { return; } step->action = TSTEP_RETIRE; if (Arch::clone == t->regs().original_syscallno()) { Registers r = t->regs(); // If we allow CLONE_UNTRACED then the child would escape from rr control // and we can't allow that. // Block CLONE_CHILD_CLEARTID because we'll emulate that ourselves. // Filter CLONE_VFORK too r.set_arg1(flags & ~(CLONE_UNTRACED | CLONE_CHILD_CLEARTID | CLONE_VFORK)); t->set_regs(r); } { // Prepare to restart syscall Registers r = t->regs(); r.set_syscallno(t->regs().original_syscallno()); r.set_ip(r.ip().decrement_by_syscall_insn_length(r.arch())); t->set_regs(r); } // Enter syscall __ptrace_cont(t, expect_syscallno); // The syscall may be interrupted. Keep trying it until we get the // ptrace event we're expecting. __ptrace_cont(t, expect_syscallno); while (!t->clone_syscall_is_complete()) { if (t->regs().syscall_result_signed() == -EAGAIN) { // clone() calls sometimes fail with -EAGAIN due to load issues or // whatever. We need to retry the system call until it succeeds. Reset // state to try the syscall again. Registers r = t->regs(); r.set_syscallno(trace_frame.regs().original_syscallno()); r.set_ip(trace_frame.regs().ip() - syscall_instruction_length(t->arch())); t->set_regs(r); // reenter syscall __ptrace_cont(t, expect_syscallno); // continue to exit __ptrace_cont(t, expect_syscallno); } else { __ptrace_cont(t, expect_syscallno); } } // Now continue again to get the syscall exit event. __ptrace_cont(t, expect_syscallno); ASSERT(t, !t->ptrace_event()) << "Unexpected ptrace event while waiting for syscall exit; got " << ptrace_event_name(t->ptrace_event()); Registers r = t->regs(); // Restore original_syscallno if vfork set it to fork r.set_original_syscallno(trace_frame.regs().original_syscallno()); // Restore the saved flags, to hide the fact that we may have // masked out CLONE_UNTRACED/CLONE_CHILD_CLEARTID. r.set_arg1(trace_frame.regs().arg1()); r.set_flags(trace_frame.regs().flags()); t->set_regs(r); // Dig the recorded tid out out of the trace. The tid value returned in // the recorded registers could be in a different pid namespace from rr's, // so we can't use it directly. TraceTaskEvent tte = read_task_trace_event( t, Arch::clone == t->regs().original_syscallno() ? TraceTaskEvent::CLONE : TraceTaskEvent::FORK); ASSERT(t, tte.parent_tid() == t->rec_tid); long rec_tid = tte.tid(); pid_t new_tid = t->get_ptrace_eventmsg_pid(); remote_ptr stack; remote_ptr tls; remote_ptr ctid; if (Arch::clone == t->regs().original_syscallno()) { remote_ptr* ptid_not_needed = nullptr; extract_clone_parameters(t, &stack, ptid_not_needed, &tls, &ctid); } Task* new_task = t->session().clone(t, clone_flags_to_task_flags(flags), stack, tls, ctid, new_tid, rec_tid); if (Arch::clone == t->regs().original_syscallno()) { /* FIXME: what if registers are non-null and contain an * invalid address? */ t->set_data_from_trace(); if (Arch::clone_tls_type == Arch::UserDescPointer) { t->set_data_from_trace(); new_task->set_data_from_trace(); } else { assert(Arch::clone_tls_type == Arch::PthreadStructurePointer); } new_task->set_data_from_trace(); new_task->set_data_from_trace(); // Fix registers in new task Registers new_r = new_task->regs(); new_r.set_original_syscallno(trace_frame.regs().original_syscallno()); new_r.set_arg1(trace_frame.regs().arg1()); new_task->set_regs(new_r); } if (!(CLONE_VM & flags)) { // It's hard to imagine a scenario in which it would // be useful to inherit breakpoints (along with their // refcounts) across a non-VM-sharing clone, but for // now we never want to do this. new_task->vm()->remove_all_breakpoints(); new_task->vm()->remove_all_watchpoints(); } t->set_return_value_from_trace(); t->validate_regs(); TraceReader::MappedData data; KernelMapping km = t->trace_reader().read_mapped_region(&data); init_scratch_memory(new_task, km, data); new_task->vm()->after_clone(); } static string find_exec_stub(SupportedArch arch) { string exe_path = exe_directory(); if (arch == x86 && NativeArch::arch() == x86_64) { exe_path += "exec_stub_32"; } else { exe_path += "exec_stub"; } return exe_path; } static void finish_direct_mmap(AutoRemoteSyscalls& remote, remote_ptr rec_addr, size_t length, int prot, int flags, off64_t mmap_offset_pages, const string& backing_file_name, off64_t backing_offset_pages, struct stat& real_file, string& real_file_name) { Task* t = remote.task(); int fd; LOG(debug) << "directly mmap'ing " << length << " bytes of " << backing_file_name << " at page offset " << HEX(backing_offset_pages); ASSERT(t, !(flags & MAP_GROWSDOWN)); /* Open in the tracee the file that was mapped during * recording. */ { AutoRestoreMem child_str(remote, backing_file_name.c_str()); /* We only need RDWR for shared writeable mappings. * Private mappings will happily COW from the mapped * RDONLY file. * * TODO: should never map any files writable */ int oflags = (MAP_SHARED & flags) && (PROT_WRITE & prot) ? O_RDWR : O_RDONLY; /* TODO: unclear if O_NOATIME is relevant for mmaps */ fd = remote.infallible_syscall(syscall_number_for_open(remote.arch()), child_str.get().as_int(), oflags); } /* And mmap that file. */ remote.infallible_mmap_syscall(rec_addr, length, /* (We let SHARED|WRITEABLE * mappings go through while * they're not handled properly, * but we shouldn't do that.) */ prot, flags | MAP_FIXED, fd, backing_offset_pages); // While it's open, grab the link reference. real_file = t->stat_fd(fd); real_file_name = t->file_name_of_fd(fd); /* Don't leak the tmp fd. The mmap doesn't need the fd to * stay open. */ remote.infallible_syscall(syscall_number_for_close(remote.arch()), fd); } static void restore_mapped_region(AutoRemoteSyscalls& remote, const KernelMapping& km, const TraceReader::MappedData& data) { Task* t = remote.task(); ASSERT(t, km.flags() & MAP_PRIVATE) << "Shared mappings after exec not supported"; string real_file_name; dev_t device = KernelMapping::NO_DEVICE; ino_t inode = KernelMapping::NO_INODE; int flags = km.flags(); uint64_t offset_bytes = 0; switch (data.source) { case TraceReader::SOURCE_FILE: { struct stat real_file; offset_bytes = km.file_offset_bytes(); finish_direct_mmap(remote, km.start(), km.size(), km.prot(), km.flags(), offset_bytes / page_size(), data.file_name, data.file_data_offset_bytes / page_size(), real_file, real_file_name); device = real_file.st_dev; inode = real_file.st_ino; break; } case TraceReader::SOURCE_TRACE: case TraceReader::SOURCE_ZERO: flags |= MAP_ANONYMOUS; remote.infallible_mmap_syscall(km.start(), km.size(), km.prot(), (flags & ~MAP_GROWSDOWN) | MAP_FIXED, -1, 0); // The data, if any, will be written back by // Task::apply_all_data_records_from_trace break; default: ASSERT(t, false) << "Unknown data source"; break; } t->vm()->map(km.start(), km.size(), km.prot(), flags, offset_bytes, real_file_name, device, inode, &km); } static void process_execve(Task* t, const TraceFrame& trace_frame, SyscallEntryOrExit state, ReplayTraceStep* step) { if (SYSCALL_ENTRY == state) { return; } if (trace_frame.regs().syscall_failed()) { return; } step->action = TSTEP_RETIRE; /* First, exec a stub program */ string stub_filename = find_exec_stub(trace_frame.regs().arch()); // Setup memory and registers for the execve call. We don't need to save // the old values since they're going to be wiped out by execve. Registers regs = t->regs(); regs.set_ip(t->vm()->traced_syscall_ip()); remote_ptr remote_mem = floor_page_size(regs.sp()); // We write a zero word in the host size, not t's size, but that's OK, // since the host size must be bigger than t's size. // We pass no argv or envp, so exec params 2 and 3 just point to the NULL // word. t->write_mem(remote_mem.cast(), size_t(0)); regs.set_arg2(remote_mem); regs.set_arg3(remote_mem); remote_mem += sizeof(size_t); t->write_bytes_helper(remote_mem, stub_filename.size() + 1, stub_filename.c_str()); regs.set_arg1(remote_mem); regs.set_syscallno(syscall_number_for_execve(t->arch())); t->set_regs(regs); /* The original_syscallno is execve in the old architecture. The kernel does * not update the original_syscallno when the architecture changes across * an exec. */ int expect_syscallno = syscall_number_for_execve(t->arch()); /* Enter our execve syscall. */ __ptrace_cont(t, expect_syscallno); ASSERT(t, !t->pending_sig()) << "Stub exec failed on entry"; /* Proceed to the PTRACE_EVENT_EXEC. */ __ptrace_cont(t, expect_syscallno); ASSERT(t, t->ptrace_event() == PTRACE_EVENT_EXEC) << "Stub exec failed?"; /* Wait for the execve exit. */ __ptrace_cont(t, expect_syscallno); vector kms; vector datas; ssize_t exe_km = -1; while (true) { TraceReader::MappedData data; bool found; KernelMapping km = t->trace_reader().read_mapped_region(&data, &found); if (!found) { break; } const string& file_name = km.fsname(); if ((km.prot() & PROT_EXEC) && file_name.size() > 0 && file_name[0] == '/' && file_name.rfind(".so") != file_name.size() - 3) { exe_km = kms.size(); } kms.push_back(km); datas.push_back(data); } ASSERT(t, exe_km >= 0) << "Can't find exe mapping"; ASSERT(t, kms[0].is_stack()) << "Can't find stack"; TraceTaskEvent tte = read_task_trace_event(t, TraceTaskEvent::EXEC); // The exe name we pass in here will be passed to gdb. Pass the backing file // name if there is one, otherwise pass the original file name (which means // we declined to copy it to the trace file during recording for whatever // reason). const string& exe_name = datas[exe_km].file_name.empty() ? kms[exe_km].fsname() : datas[exe_km].file_name; t->post_exec(&trace_frame.regs(), &trace_frame.extra_regs(), &exe_name); t->post_exec_syscall(tte); { // Tell AutoRemoteSyscalls that we don't need memory parameters. This will // stop it from having trouble if our current stack pointer (the value // from the replay) isn't in the [stack] mapping created for our stub. AutoRemoteSyscalls remote(t, AutoRemoteSyscalls::DISABLE_MEMORY_PARAMS); // Now fix up the address space. First unmap all the mappings other than // our rr page. vector unmaps; for (auto m : t->vm()->maps()) { // Do not attempt to unmap [vsyscall] --- it doesn't work. if (m.map.start() != AddressSpace::rr_page_start() && !m.map.is_vsyscall()) { unmaps.push_back(m.map); } } for (auto& m : unmaps) { remote.infallible_syscall(syscall_number_for_munmap(t->arch()), m.start(), m.size()); t->vm()->unmap(m.start(), m.size()); } // We will have unmapped the stack memory that |remote| would have used for // memory parameters. Fortunately process_mapped_region below doesn't // need any memory parameters for its remote syscalls. // Process the [stack] mapping. restore_mapped_region(remote, kms[0], datas[0]); } const string& recorded_exe_name = kms[exe_km].fsname(); { // Now that [stack] is mapped, reinitialize AutoRemoteSyscalls with // memory parameters enabled. AutoRemoteSyscalls remote(t); // Now map in all the mappings that we recorded from the real exec. for (ssize_t i = 1; i < ssize_t(kms.size()) - 1; ++i) { restore_mapped_region(remote, kms[i], datas[i]); } size_t index = recorded_exe_name.rfind('/'); string name = string("rr:") + recorded_exe_name.substr(index == string::npos ? 0 : index + 1); AutoRestoreMem mem(remote, name.c_str()); remote.infallible_syscall(syscall_number_for_prctl(t->arch()), PR_SET_NAME, mem.get()); } init_scratch_memory(t, kms.back(), datas.back()); // Apply final data records --- fixing up the last page in each data segment // for zeroing applied by the kernel, and applying monkeypatches. t->apply_all_data_records_from_trace(); // Now it's safe to save the auxv data t->vm()->save_auxv(t); } /** * Return true if a FUTEX_LOCK_PI operation on |futex| done by |t| * will transition the futex into the contended state. (This results * in the kernel atomically setting the FUTEX_WAITERS bit on the futex * value.) The new value of the futex after the kernel updates it is * returned in |next_val|. */ static bool is_now_contended_pi_futex(Task* t, remote_ptr futex, int* next_val) { int val = t->read_mem(futex); pid_t owner_tid = (val & FUTEX_TID_MASK); bool now_contended = (owner_tid != 0 && owner_tid != t->rec_tid && !(val & FUTEX_WAITERS)); if (now_contended) { LOG(debug) << t->tid << ": futex " << futex << " is " << val << ", so WAITERS bit will be set"; *next_val = (owner_tid & FUTEX_TID_MASK) | FUTEX_WAITERS; } return now_contended; } static void process_futex(Task* t, const TraceFrame& trace_frame, SyscallEntryOrExit state, ReplayTraceStep* step) { if (state == SYSCALL_EXIT) { return; } const Registers& regs = trace_frame.regs(); int op = (int)regs.arg2_signed() & FUTEX_CMD_MASK; if (FUTEX_LOCK_PI == op) { remote_ptr futex = regs.arg1(); int next_val; if (is_now_contended_pi_futex(t, futex, &next_val)) { // During recording, we waited for the // kernel to update the futex, but // since we emulate SYS_futex in // replay, we need to set it ourselves // here. // XXX this seems wrong. we're setting it while there is still tracee // code to execute before we reach the syscall! t->write_mem(futex, next_val); } } } static void process_brk(Task* t, const TraceFrame& trace_frame, SyscallEntryOrExit state, ReplayTraceStep* step) { if (state == SYSCALL_ENTRY) { return; } TraceReader::MappedData data; KernelMapping km = t->trace_reader().read_mapped_region(&data); // Zero flags means it's an an unmap, or no change. if (km.flags()) { AutoRemoteSyscalls remote(t); ASSERT(t, data.source == TraceReader::SOURCE_ZERO); remote.infallible_mmap_syscall(km.start(), km.size(), km.prot(), MAP_ANONYMOUS | MAP_FIXED | km.flags(), -1, 0); t->vm()->map(km.start(), km.size(), km.prot(), MAP_ANONYMOUS | km.flags(), 0, "[heap]", KernelMapping::NO_DEVICE, KernelMapping::NO_INODE, &km); } else if (km.size() > 0) { AutoRemoteSyscalls remote(t); remote.infallible_syscall(syscall_number_for_munmap(t->arch()), km.start(), km.size()); t->vm()->unmap(km.start(), km.size()); } } /** * Pass NOTE_TASK_MAP to update cached mmap data. If the data * need to be manually updated, pass |DONT_NOTE_TASK_MAP| and update * it manually. */ enum NoteTaskMap { DONT_NOTE_TASK_MAP = 0, NOTE_TASK_MAP }; static remote_ptr finish_anonymous_mmap(AutoRemoteSyscalls& remote, const TraceFrame& trace_frame, size_t length, int prot, int flags, NoteTaskMap note_task_map) { const Registers& rec_regs = trace_frame.regs(); /* *Must* map the segment at the recorded address, regardless of what the recorded tracee passed as the |addr| hint. */ remote_ptr rec_addr = rec_regs.syscall_result(); string file_name; dev_t device = KernelMapping::NO_DEVICE; ino_t inode = KernelMapping::NO_INODE; KernelMapping recorded_km; if (flags & MAP_PRIVATE) { remote.infallible_mmap_syscall(rec_addr, length, prot, // Tell the kernel to take |rec_addr| // seriously. (flags & ~MAP_GROWSDOWN) | MAP_FIXED, -1, 0); recorded_km = KernelMapping(rec_addr, rec_addr + ceil_page_size(length), string(), KernelMapping::NO_DEVICE, KernelMapping::NO_INODE, prot, flags, 0); } else { TraceReader::MappedData data; recorded_km = remote.task()->trace_reader().read_mapped_region(&data); ASSERT(remote.task(), data.source == TraceReader::SOURCE_ZERO); auto emufile = remote.task()->replay_session().emufs().get_or_create( recorded_km, length); struct stat real_file; finish_direct_mmap(remote, rec_addr, length, prot, flags & ~MAP_ANONYMOUS, 0, emufile->proc_path(), 0, real_file, file_name); device = real_file.st_dev; inode = real_file.st_ino; } if (note_task_map) { remote.task()->vm()->map(rec_addr, length, prot, flags, 0, file_name, device, inode, &recorded_km); } return rec_addr; } /* Ensure that accesses to the memory region given by start/length cause a SIGBUS, as for accesses beyond the end of an mmaped file. */ static void create_sigbus_region(AutoRemoteSyscalls& remote, int prot, remote_ptr start, size_t length, const KernelMapping& km) { if (length == 0) { return; } /* Open an empty file in the tracee */ char filename[] = PREFIX_FOR_EMPTY_MMAPED_REGIONS "XXXXXX"; { /* Close our side immediately */ ScopedFd fd(mkstemp(filename)); } int child_fd; { AutoRestoreMem child_str(remote, filename); child_fd = remote.infallible_syscall(syscall_number_for_open(remote.arch()), child_str.get(), O_RDONLY); } /* Unlink it now that the child has opened it */ unlink(filename); struct stat fstat = remote.task()->stat_fd(child_fd); string file_name = remote.task()->file_name_of_fd(child_fd); /* mmap it in the tracee. We need to set the correct 'prot' flags so that the correct signal is generated on a memory access (SEGV if 'prot' doesn't allow the access, BUS if 'prot' does allow the access). */ remote.infallible_mmap_syscall(start, length, prot, MAP_FIXED | MAP_PRIVATE, child_fd, 0); /* Don't leak the tmp fd. The mmap doesn't need the fd to * stay open. */ remote.infallible_syscall(syscall_number_for_close(remote.arch()), child_fd); KernelMapping km_slice = km.subrange(start, start + length); remote.task()->vm()->map(start, length, prot, MAP_FIXED | MAP_PRIVATE, 0, file_name, fstat.st_dev, fstat.st_ino, &km_slice); } static void finish_private_mmap(AutoRemoteSyscalls& remote, const TraceFrame& trace_frame, size_t length, int prot, int flags, off64_t offset_pages, const KernelMapping& km) { LOG(debug) << " finishing private mmap of " << km.fsname(); Task* t = remote.task(); size_t num_bytes = length; remote_ptr mapped_addr = finish_anonymous_mmap(remote, trace_frame, length, prot, /* The restored region won't be backed * by file. */ flags | MAP_ANONYMOUS, DONT_NOTE_TASK_MAP); /* Restore the map region we copied. */ ssize_t data_size = t->set_data_from_trace(); /* Ensure pages past the end of the file fault on access */ size_t data_pages = ceil_page_size(data_size); size_t mapped_pages = ceil_page_size(num_bytes); t->vm()->map(mapped_addr, num_bytes, prot, flags | MAP_ANONYMOUS, page_size() * offset_pages, string(), KernelMapping::NO_DEVICE, KernelMapping::NO_INODE, &km); create_sigbus_region(remote, prot, mapped_addr + data_pages, mapped_pages - data_pages, km); } static void finish_shared_mmap(AutoRemoteSyscalls& remote, const TraceFrame& trace_frame, int prot, int flags, off64_t offset_pages, size_t file_size, const KernelMapping& km) { Task* t = remote.task(); auto buf = t->trace_reader().read_raw_data(); size_t rec_num_bytes = ceil_page_size(buf.data.size()); // Ensure there's a virtual file for the file that was mapped // during recording. auto emufile = t->replay_session().emufs().get_or_create(km, file_size); // Re-use the direct_map() machinery to map the virtual file. // // NB: the tracee will map the procfs link to our fd; there's // no "real" name for the file anywhere, to ensure that when // we exit/crash the kernel will clean up for us. struct stat real_file; string real_file_name; finish_direct_mmap(remote, buf.addr, rec_num_bytes, prot, flags, offset_pages, emufile->proc_path(), offset_pages, real_file, real_file_name); // Write back the snapshot of the segment that we recorded. // We have to write directly to the underlying file, because // the tracee may have mapped its segment read-only. // // TODO: this is a poor man's shared segment synchronization. // For full generality, we also need to emulate direct file // modifications through write/splice/etc. off64_t offset_bytes = page_size() * offset_pages; if (ssize_t(buf.data.size()) != pwrite64(emufile->fd(), buf.data.data(), buf.data.size(), offset_bytes)) { FATAL() << "Failed to write " << buf.data.size() << " bytes at " << HEX(offset_bytes) << " to " << emufile->real_path() << " for " << emufile->emu_path(); } LOG(debug) << " restored " << buf.data.size() << " bytes at " << HEX(offset_bytes) << " to " << emufile->real_path() << " for " << emufile->emu_path(); t->vm()->map(buf.addr, buf.data.size(), prot, flags, offset_bytes, real_file_name, real_file.st_dev, real_file.st_ino, &km); } static void process_mmap(Task* t, const TraceFrame& trace_frame, SyscallEntryOrExit state, size_t length, int prot, int flags, off64_t offset_pages, ReplayTraceStep* step) { if (SYSCALL_ENTRY == state) { return; } if (trace_frame.regs().syscall_failed()) { return; } step->action = TSTEP_RETIRE; /* Successful mmap calls are much more interesting to process. */ { // Next we hand off actual execution of the mapping to the // appropriate helper. AutoRemoteSyscalls remote(t); if (flags & MAP_ANONYMOUS) { finish_anonymous_mmap(remote, trace_frame, length, prot, flags, NOTE_TASK_MAP); } else { TraceReader::MappedData data; KernelMapping km = t->trace_reader().read_mapped_region(&data); if (data.source == TraceReader::SOURCE_FILE) { struct stat real_file; string real_file_name; finish_direct_mmap(remote, trace_frame.regs().syscall_result(), length, prot, flags, offset_pages, data.file_name, data.file_data_offset_bytes / page_size(), real_file, real_file_name); t->vm()->map(km.start(), length, prot, flags, page_size() * offset_pages, real_file_name, real_file.st_dev, real_file.st_ino, &km); } else { ASSERT(t, data.source == TraceReader::SOURCE_TRACE); if (MAP_PRIVATE & flags) { finish_private_mmap(remote, trace_frame, length, prot, flags, offset_pages, km); } else { finish_shared_mmap(remote, trace_frame, prot, flags, offset_pages, data.file_size_bytes, km); } } } // Finally, we finish by emulating the return value. remote.regs().set_syscall_result(trace_frame.regs().syscall_result()); } // Monkeypatcher can emit data records that need to be applied now t->apply_all_data_records_from_trace(); t->validate_regs(); } void process_grow_map(Task* t) { AutoRemoteSyscalls remote(t); TraceReader::MappedData data; KernelMapping km = t->trace_reader().read_mapped_region(&data); ASSERT(t, km.size()); restore_mapped_region(remote, km, data); } static void process_shmat(Task* t, const TraceFrame& trace_frame, SyscallEntryOrExit state, int shmid, int shm_flags, ReplayTraceStep* step) { if (SYSCALL_ENTRY == state) { return; } if (trace_frame.regs().syscall_failed()) { return; } step->action = TSTEP_RETIRE; { AutoRemoteSyscalls remote(t); TraceReader::MappedData data; KernelMapping km = t->trace_reader().read_mapped_region(&data); int prot = shm_flags_to_mmap_prot(shm_flags); int flags = MAP_SHARED; finish_shared_mmap(remote, trace_frame, prot, flags, 0, data.file_size_bytes, km); // Finally, we finish by emulating the return value. remote.regs().set_syscall_result(trace_frame.regs().syscall_result()); } // on x86-32 we have an extra data record that we need to apply --- // the ipc syscall's klugy out-parameter. t->apply_all_data_records_from_trace(); t->validate_regs(); } static void process_shmdt(Task* t, const TraceFrame& trace_frame, SyscallEntryOrExit state, remote_ptr addr, ReplayTraceStep* step) { if (SYSCALL_ENTRY == state) { return; } if (trace_frame.regs().syscall_failed()) { return; } step->action = TSTEP_RETIRE; { AutoRemoteSyscalls remote(t); auto mapping = t->vm()->mapping_of(addr); ASSERT(t, mapping.map.start() == addr); remote.infallible_syscall(syscall_number_for_munmap(t->arch()), addr, mapping.map.end() - addr); remote.regs().set_syscall_result(trace_frame.regs().syscall_result()); } t->validate_regs(); } static void process_init_buffers(Task* t, SyscallEntryOrExit state, ReplayTraceStep* step) { /* This was a phony syscall to begin with. */ if (SYSCALL_ENTRY == state) { return; } step->action = TSTEP_RETIRE; /* Proceed to syscall exit so we can run our own syscalls. */ remote_ptr rec_child_map_addr = t->current_trace_frame().regs().syscall_result(); /* We don't want the desched event fd during replay, because * we already know where they were. (The perf_event fd is * emulated anyway.) */ t->init_buffers(rec_child_map_addr); ASSERT(t, t->syscallbuf_child.cast() == rec_child_map_addr) << "Should have mapped syscallbuf at " << rec_child_map_addr << ", but it's at " << t->syscallbuf_child; t->validate_regs(); } template static void rep_after_enter_syscall_arch(Task* t, int syscallno) { switch (syscallno) { case Arch::exit: t->destroy_buffers(); break; case Arch::write: case Arch::writev: { int fd = (int)t->regs().arg1_signed(); t->fd_table()->will_write(t, fd); break; } } } void rep_after_enter_syscall(Task* t, int syscallno) { RR_ARCH_FUNCTION(rep_after_enter_syscall_arch, t->arch(), t, syscallno) } template static void rep_process_syscall_arch(Task* t, ReplayTraceStep* step) { /* FIXME: don't shadow syscall() */ int syscall = t->current_trace_frame().event().Syscall().number; const TraceFrame& trace_frame = t->replay_session().current_trace_frame(); const Registers& trace_regs = trace_frame.regs(); SyscallEntryOrExit state; switch (trace_frame.event().Syscall().state) { case ENTERING_SYSCALL: state = SYSCALL_ENTRY; break; case EXITING_SYSCALL: state = SYSCALL_EXIT; break; default: ASSERT(t, "Not entering or exiting?"); return; } LOG(debug) << "processing " << t->syscall_name(syscall) << " (" << state_name(state) << ")"; // sigreturns are never restartable, and the value of the // syscall-result register after a sigreturn is not actually the // syscall result --- and may be anything, including one of the values // below. if (SYSCALL_EXIT == state && trace_regs.syscall_may_restart() && !is_sigreturn(syscall, t->arch())) { bool interrupted_restart = (EV_SYSCALL_INTERRUPTION == t->ev().type()); // The tracee was interrupted while attempting to // restart a syscall. We have to look at the previous // event to see which syscall we're trying to restart. if (interrupted_restart) { syscall = t->ev().Syscall().number; LOG(debug) << " interrupted " << t->syscall_name(syscall) << " interrupted again"; } // During recording, when a syscall exits with a // restart "error", the kernel sometimes restarts the // tracee by resetting its $ip to the syscall entry // point, but other times restarts the syscall without // changing the $ip. In the latter case, we have to // leave the syscall return "hanging". If it's // restarted without changing the $ip, we'll skip // advancing to the restart entry below and just // emulate exit by setting the kernel outparams. // // It's probably possible to predict which case is // going to happen (seems to be for // -ERESTART_RESTARTBLOCK and // ptrace-declined-signal-delivery restarts), but it's // simpler and probably more reliable to just check // the tracee $ip at syscall restart to determine // whether syscall re-entry needs to occur. t->apply_all_data_records_from_trace(); t->set_return_value_from_trace(); // Use this record to recognize the syscall if it // indeed restarts. If the syscall isn't restarted, // we'll pop this event eventually, at the point when // the recorder determined that the syscall wasn't // going to be restarted. if (!interrupted_restart) { // For interrupted SYS_restart_syscall's, // reuse the restart record, both because // that's semantically correct, and because // we'll only know how to pop one interruption // event later. t->push_event(Event(interrupted, SyscallEvent(syscall, t->arch()))); t->ev().Syscall().regs = t->regs(); } step->action = TSTEP_RETIRE; LOG(debug) << " " << t->syscall_name(syscall) << " interrupted by " << trace_regs.syscall_result() << " at " << trace_regs.ip() << ", may restart"; return; } if (Arch::restart_syscall == syscall) { ASSERT(t, EV_SYSCALL_INTERRUPTION == t->ev().type()) << "Must have interrupted syscall to restart"; syscall = t->ev().Syscall().number; if (SYSCALL_ENTRY == state) { remote_code_ptr intr_ip = t->ev().Syscall().regs.ip(); auto cur_ip = t->ip(); LOG(debug) << "'restarting' " << t->syscall_name(syscall) << " interrupted by " << t->ev().Syscall().regs.syscall_result() << " at " << intr_ip << "; now at " << cur_ip; if (cur_ip == intr_ip) { t->emulate_syscall_entry(t->regs()); step->action = TSTEP_RETIRE; return; } } else { t->pop_syscall_interruption(); LOG(debug) << "exiting restarted " << t->syscall_name(syscall); } } step->syscall.number = syscall; step->action = syscall_action(state); /* Manual implementations of irregular syscalls that need to do more during * replay than just modify register and memory state. * Don't let a negative incoming syscall number be treated as a real * system call that we assigned a negative number because it doesn't * exist in this architecture. * All invalid/unsupported syscalls get the default emulation treatment. */ switch (syscall < 0 ? INT32_MAX : syscall) { case Arch::clone: return process_clone(t, trace_frame, state, step, trace_frame.regs().arg1(), syscall); case Arch::vfork: if (state == SYSCALL_EXIT) { Registers r = t->regs(); r.set_original_syscallno(Arch::fork); t->set_regs(r); } return process_clone(t, trace_frame, state, step, 0, Arch::fork); case Arch::fork: return process_clone(t, trace_frame, state, step, 0, syscall); case Arch::execve: return process_execve(t, trace_frame, state, step); case Arch::futex: return process_futex(t, trace_frame, state, step); case Arch::ptrace: if (SYSCALL_ENTRY == state) { return; } switch ((int)trace_frame.regs().arg1_signed()) { case PTRACE_POKETEXT: case PTRACE_POKEDATA: if (!trace_frame.regs().syscall_failed()) { Task* target = t->session().find_task((pid_t)trace_frame.regs().arg2_signed()); ASSERT(t, target); remote_ptr addr = trace_frame.regs().arg3(); typename Arch::unsigned_word data = trace_frame.regs().arg4(); target->write_mem(addr, data); } break; } break; case Arch::brk: return process_brk(t, trace_frame, state, step); case Arch::mmap: { // process_mmap checks 'state' too, but we need to check it now to // avoid reading 'args' prematurely. When state == SYSCALL_ENTRY, // there could be a lot of code to execute before we reach the syscall. if (SYSCALL_ENTRY == state) { return; } switch (Arch::mmap_semantics) { case Arch::StructArguments: { auto args = t->read_mem( remote_ptr(trace_regs.arg1())); return process_mmap(t, trace_frame, state, args.len, args.prot, args.flags, args.offset / page_size(), step); } case Arch::RegisterArguments: return process_mmap(t, trace_frame, state, trace_regs.arg2(), trace_regs.arg3(), trace_regs.arg4(), trace_regs.arg6() / page_size(), step); } } case Arch::mmap2: return process_mmap(t, trace_frame, state, trace_regs.arg2(), trace_regs.arg3(), trace_regs.arg4(), trace_regs.arg6(), step); case Arch::shmat: return process_shmat(t, trace_frame, state, trace_regs.arg1(), trace_regs.arg3(), step); case Arch::shmdt: return process_shmdt(t, trace_frame, state, trace_regs.arg1(), step); case Arch::mremap: if (state == SYSCALL_EXIT) { // We must emulate mremap because the kernel's choice for the remap // destination can vary (in particular, when we emulate exec it makes // different decisions). AutoRemoteSyscalls remote(t); if (trace_regs.syscall_result() == trace_regs.arg1()) { // Non-moving mremap. Don't pass MREMAP_FIXED or MREMAP_MAYMOVE // since that triggers EINVAL when the new map overlaps the old map. remote.infallible_syscall_ptr(syscall, trace_regs.arg1(), trace_regs.arg2(), trace_regs.arg3(), 0); } else { // Force the mremap to use the destination address from recording. // XXX could the new mapping overlap the old, with different start // addresses? Hopefully the kernel doesn't do that to us!!! remote.infallible_syscall_ptr( syscall, trace_regs.arg1(), trace_regs.arg2(), trace_regs.arg3(), MREMAP_MAYMOVE | MREMAP_FIXED, trace_regs.syscall_result()); } // Task::on_syscall_exit takes care of updating AddressSpace. } return; case Arch::madvise: switch ((int)t->regs().arg3()) { case MADV_DONTNEED: case MADV_REMOVE: break; default: return; } /* fall through */ case Arch::munmap: case Arch::mprotect: case Arch::arch_prctl: case Arch::set_thread_area: if (state == SYSCALL_EXIT) { // Using AutoRemoteSyscalls here fails for arch_prctl, not sure why. Registers r = t->regs(); r.set_syscallno(t->regs().original_syscallno()); r.set_ip(r.ip().decrement_by_syscall_insn_length(r.arch())); t->set_regs(r); if (syscall == Arch::mprotect) { t->vm()->fixup_mprotect_growsdown_parameters(t); } __ptrace_cont(t, syscall); __ptrace_cont(t, syscall); ASSERT(t, t->regs().syscall_result() == trace_regs.syscall_result()); if (syscall == Arch::mprotect) { Registers r2 = t->regs(); r2.set_arg1(r.arg1()); r2.set_arg2(r.arg2()); r2.set_arg3(r.arg3()); t->set_regs(r2); } } return; case Arch::ipc: switch ((int)trace_regs.arg1_signed()) { case SHMAT: return process_shmat(t, trace_frame, state, trace_regs.arg2(), trace_regs.arg3(), step); case SHMDT: return process_shmdt(t, trace_frame, state, trace_regs.arg5(), step); default: break; } break; case Arch::prctl: if (state == SYSCALL_EXIT) { switch ((int)trace_regs.arg1_signed()) { case PR_SET_NAME: { t->update_prname(trace_regs.arg2()); return; } } } return; case Arch::sigreturn: case Arch::rt_sigreturn: if (state == SYSCALL_EXIT) { t->set_regs(trace_regs); t->set_extra_regs(trace_frame.extra_regs()); step->action = TSTEP_RETIRE; } return; case Arch::write: case Arch::writev: if (state == SYSCALL_EXIT) { /* write*() can be desched'd, but don't use scratch, * so we might have saved 0 bytes of scratch after a * desched. */ maybe_noop_restore_syscallbuf_scratch(t); } return; case SYS_rrcall_init_buffers: return process_init_buffers(t, state, step); case SYS_rrcall_init_preload: if (state == SYSCALL_EXIT) { t->at_preload_init(); } return; case SYS_rrcall_notify_syscall_hook_exit: if (SYSCALL_ENTRY == state) { ASSERT(t, t->syscallbuf_hdr); t->syscallbuf_hdr->notify_on_syscall_hook_exit = true; } return; default: return; } } void rep_process_syscall(Task* t, ReplayTraceStep* step) { // Use the event's arch, not the task's, because the task's arch may // be out of date immediately after an exec. RR_ARCH_FUNCTION(rep_process_syscall_arch, t->current_trace_frame().event().arch(), t, step) } rr-4.1.0/src/replay_syscall.h000066400000000000000000000013411265436462100161350ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_REP_PROCESS_EVENT_H_ #define RR_REP_PROCESS_EVENT_H_ #include "TraceStream.h" class Task; struct ReplayTraceStep; /** * Call this when |t| has just entered a syscall. At this point, data * saved at |rec_before_record_syscall_entry()| can be restored. */ void rep_after_enter_syscall(Task* t, int syscallno); /** * Process pending syscall. Call this when |t| is about to enter or exit * a syscall. */ void rep_process_syscall(Task* t, ReplayTraceStep* step); /** * Process an EV_GROW_MAP event. These are like mmap syscalls, so handled * in replay_syscall. */ void process_grow_map(Task* t); #endif /* RR_REP_PROCESS_EVENT_H_ */ rr-4.1.0/src/script/000077500000000000000000000000001265436462100142435ustar00rootroot00000000000000rr-4.1.0/src/script/checkpoint-visualizer.html000066400000000000000000000053271265436462100214620ustar00rootroot00000000000000
CheckpointTimeTime to next
rr-4.1.0/src/script/setup_travis.sh000077500000000000000000000007531265436462100173370ustar00rootroot00000000000000#!/bin/bash # Install the prerequisites needed to build and run tests on travis-ci. echo Configuring travis-ci build slave ... echo The slave is `uname -a` packages=(linux-libc-dev linux-libc-dev:i386 gcc-multilib libc6-dev:i386 rpm g++ lib32stdc++6 gdb zlib1g:i386 zlib1g-dev:i386 python-pexpect) sudo apt-get update && \ sudo apt-get install "${packages[@]}" sudo ln -s /usr/lib32/libstdc++.so.6 /usr/lib32/libstdc++.so && \ echo ... finished configuring slave rr-4.1.0/src/script/tag-release.sh000077500000000000000000000017721265436462100170020ustar00rootroot00000000000000#!/bin/bash function fatal { why=$1; echo "[FATAL]" $why >&2 exit 1 } major=$1 minor=$2 patch=$3 ver="$major.$minor.$patch" echo "Preparing for release '$ver' ..." if [[ $major == "" || $minor == "" || $patch == "" ]]; then fatal "Usage: ./tag-release.sh MAJOR MINOR PATCH" fi verfile=CMakeLists.txt echo "Patching $verfile with new version string ..." sed -i "s/rr_VERSION_MAJOR [0-9][0-9]*/rr_VERSION_MAJOR $major/g" $verfile sed -i "s/rr_VERSION_MINOR [0-9][0-9]*/rr_VERSION_MINOR $minor/g" $verfile sed -i "s/rr_VERSION_PATCH [0-9][0-9]*/rr_VERSION_PATCH $patch/g" $verfile echo "Showing diff for $verfile ..." git diff -p -U8 echo -n "Is this what you expected to see? [Y/n] " read ok if [[ $ok != "Y" ]]; then fatal "Oops. Aborting version update by user request." fi echo "Generating git commit ..." git commit $verfile -m "Bump version to $ver." echo "Generating git tag $ver ..." git tag $ver echo "Done! Publish the new version using 'git push --all' or 'git push; git push --tags'." rr-4.1.0/src/script/update-gh-pages.sh000077500000000000000000000013361265436462100175600ustar00rootroot00000000000000#!/bin/bash function fatal { why=$1; echo "[FATAL]" $why >&2 exit 1 } rev=HEAD if [[ $1 != "" ]]; then rev=$1 fi ver=`git name-rev --name-only --tags $rev` if [[ $ver == undefined ]]; then fatal "No tag found" fi echo "Updating repo ..." git checkout gh-pages || fatal "Failed to checkout gh-pages branch." verfile=index.html echo "Patching $verfile with new version $ver ..." sed -i "s/[^<]*$ver * Authors: * Will Drewry * Kees Cook * * The code may be used by anyone for any purpose, and can serve as a * starting point for developing applications using mode 2 seccomp. */ #ifndef RR_SECCOMP_BPF_H_ #define RR_SECCOMP_BPF_H_ #define _GNU_SOURCE 1 #include #include #include #include #include #include #include #include #include #include #ifndef PR_SET_NO_NEW_PRIVS #define PR_SET_NO_NEW_PRIVS 38 #endif #include #include #include #ifdef HAVE_LINUX_SECCOMP_H #include #endif #ifndef SECCOMP_MODE_FILTER #define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */ #define SECCOMP_RET_KILL 0x00000000U #define SECCOMP_RET_TRAP 0x00030000U #define SECCOMP_RET_ERRNO 0x00050000U #define SECCOMP_RET_TRACE 0x7ff00000U #define SECCOMP_RET_ALLOW 0x7fff0000U #define SECCOMP_RET_ACTION 0x7fff0000U #define SECCOMP_RET_DATA 0x0000ffffU struct seccomp_data { int nr; __u32 arch; __u64 instruction_pointer; __u64 args[6]; }; #endif #define inst_ptr (offsetof(struct seccomp_data, instruction_pointer)) #define ALLOW_SYSCALLS_FROM_CALLSITE(callsite) \ BPF_STMT(BPF_LD + BPF_W + BPF_ABS, inst_ptr), \ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, callsite, 0, 1), ALLOW_PROCESS #define ALLOW_PROCESS BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ALLOW) #define TRACE_PROCESS \ BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_TRACE | SECCOMP_RET_DATA) #endif /* RR_SECCOMP_BPF_H_ */ rr-4.1.0/src/syscalls.py000066400000000000000000002057761265436462100151670ustar00rootroot00000000000000class BaseSyscall(object): """A base class for syscalls. The constructor accepts specifications for the x86 and x86-64 syscall numbers; if one of them does not exist, then the associated syscall is assumed to not exist on the corresponding architecture. """ # Take **kwargs and ignore to make life easier on RegularSyscall. def __init__(self, x86=None, x64=None, **kwargs): assert x86 or x64 # Must exist on one architecture. self.x86 = x86 self.x64 = x64 assert len(kwargs) is 0 class RestartSyscall(BaseSyscall): """A special class for the restart_syscall syscall.""" def __init__(self, x86=None, x64=None): BaseSyscall.__init__(self, x86=x86, x64=x64) class UnsupportedSyscall(BaseSyscall): """A syscall that is unsupported by rr. It is useful to expose these syscalls to the system, so that proper names can be displayed in error messages, if nothing else. They also serve as useful documentation. """ def __init__(self, x86=None, x64=None): BaseSyscall.__init__(self, x86=x86, x64=x64) class InvalidSyscall(UnsupportedSyscall): """A syscall that is unsupported by rr and unimplemented by Linux. We distinguish syscalls unimplemented by any version of Linux supported by rr from other UnsupportedSyscalls, to help us track the completeness of rr's syscall support. """ def __init__(self, x86=None, x64=None): UnsupportedSyscall.__init__(self, x86=x86, x64=x64) class RegularSyscall(BaseSyscall): """A syscall for which replay information may be recorded automatically. The arguments required for rr to record may be specified directly through the arg1...arg6 keyword arguments. The values for these arguments determine the size of the associated arguments to the syscall. The only allowed type for a given argument is a Python string, in which case the size of the argument is sizeof(arg). To ensure correct handling for mixed-arch process groups (e.g. a mix of 32 and 64-bit processes), types should be specified using Arch instead of referring directly to the host system types. """ def __init__(self, **kwargs): for a in range(1,6): arg = 'arg' + str(a) if arg in kwargs: self.__setattr__(arg, kwargs[arg]) kwargs.pop(arg) BaseSyscall.__init__(self, **kwargs) class EmulatedSyscall(RegularSyscall): """A wrapper for regular syscalls. """ def __init__(self, **kwargs): RegularSyscall.__init__(self, **kwargs) class IrregularEmulatedSyscall(BaseSyscall): """A wrapper for irregular syscalls. """ def __init__(self, **kwargs): BaseSyscall.__init__(self, **kwargs) # void exit(int status) # # The exit() function causes normal process termination and the value # of status & 0377 is returned to the parent (see wait(2)). exit = IrregularEmulatedSyscall(x86=1, x64=60) # Obsolete, glibc calls clone() instead. # But Google Breakpad uses it! fork = EmulatedSyscall(x86=2, x64=57) # ssize_t read(int fd, void *buf, size_t count); # # read() attempts to read up to count bytes from file descriptor fd # into the buffer starting at buf. # # CHECKED: (trace->recorded_regs.eax > 0) read = IrregularEmulatedSyscall(x86=3, x64=0) # ssize_t write(int fd, const void *buf, size_t count); # # write() writes up to count bytes to the file referenced by the file # descriptor fd from the buffer starting at buf. POSIX requires that # a read() which can be proved to occur after a write() has returned # returns the new data. Note that not all file systems are POSIX # conforming. # # Note: write isn't irregular per se; we hook it to redirect output # to stdout/stderr during replay. write = IrregularEmulatedSyscall(x86=4, x64=1) # int open(const char *pathname, int flags) # int open(const char *pathname, int flags, mode_t mode) # # Given a pathname for a file, open() returns a file descriptor, a # small, nonnegative integer for use in subsequent system calls # (read(2), write(2), lseek(2), fcntl(2), etc.). The file descriptor # returned by a successful call will be the lowest-numbered file # descriptor not currently open for the process. open = EmulatedSyscall(x86=5, x64=2) # int close(int fd) # # close() closes a file descriptor, so that it no longer refers to # any file and may be reused. Any record locks (see fcntl(2)) held # on the file it was associated with, and owned by the process, are # removed (regardless of the file descriptor that was used to obtain # the lock). close = IrregularEmulatedSyscall(x86=6, x64=3) # pid_t waitpid(pid_t pid, int *status, int options); # # The waitpid() system call suspends execution of the calling process # until a child specified by pid argument has changed state. By # default, waitpid() waits only for terminated children, but this # behavior is modifiable via the options argument, as described # below.... waitpid = IrregularEmulatedSyscall(x86=7) # int creat(const char *pathname, mode_t mode); # # creat() is equivalent to open() with flags equal to # O_CREAT|O_WRONLY|O_TRUNC. creat = EmulatedSyscall(x86=8, x64=85) # int link(const char *oldpath, const char *newpath); # # link() creates a new link (also known as a hard link) to an # existing file. link = EmulatedSyscall(x86=9, x64=86) # int unlink(const char *path); # # The unlink() function shall remove a link to a file. If path names # a symbolic link, unlink() shall remove the symbolic link named by # path and shall not affect any file or directory named by the # contents of the symbolic link. Otherwise, unlink() shall remove the # link named by the pathname pointed to by path and shall decrement # the link count of the file referenced by the link. unlink = EmulatedSyscall(x86=10, x64=87) # int execve(const char *filename, char *const argv[], char *const envp[]); # # execve() executes the program pointed to by filename. execve = IrregularEmulatedSyscall(x86=11, x64=59) # int chdir(const char *path); # # chdir() changes the current working directory of the calling # process to the directory specified in path. chdir = EmulatedSyscall(x86=12, x64=80) # time_t time(time_t *t); # # time() returns the time since the Epoch (00:00:00 UTC, January 1, # 1970), measured in seconds. If t is non-NULL, the return value is # also stored in the memory pointed to by t. time = EmulatedSyscall(x86=13, x64=201, arg1="typename Arch::time_t") mknod = EmulatedSyscall(x86=14, x64=133) # int chmod(const char *path, mode_t mode) # # The mode of the file given by path or referenced by fildes is # changed. chmod = EmulatedSyscall(x86=15, x64=90) lchown = EmulatedSyscall(x86=16, x64=94) _break = InvalidSyscall(x86=17) oldstat = UnsupportedSyscall(x86=18) # off_t lseek(int fd, off_t offset, int whence) # # The lseek() function repositions the offset of the open file # associated with the file descriptor fd to the argument offset # according to the directive whence as follows: lseek = EmulatedSyscall(x86=19, x64=8) # pid_t getpid(void); # # getpid() returns the process ID of the calling process. (This is # often used by routines that generate unique temporary # filenames.) getpid = EmulatedSyscall(x86=20, x64=39) mount = UnsupportedSyscall(x86=21, x64=165) umount = UnsupportedSyscall(x86=22) setuid = EmulatedSyscall(x86=23, x64=105) getuid = EmulatedSyscall(x86=24, x64=102) stime = UnsupportedSyscall(x86=25) # long ptrace(enum __ptrace_request request, pid_t pid, # void *addr, void *data); # # The ptrace() system call provides a means by which one process (the # "tracer") may observe and control the execution of another process # (the "tracee"), and examine and change the tracee's memory and # registers. It is primarily used to implement breakpoint debugging # and system call tracing. ptrace = IrregularEmulatedSyscall(x86=26, x64=101) # unsigned int alarm(unsigned int seconds) # # The alarm() system call schedules an alarm. The process will get a # SIGALRM after the requested amount of seconds. alarm = EmulatedSyscall(x86=27, x64=37) oldfstat = UnsupportedSyscall(x86=28) # int pause(void); # # pause() causes the calling process (or thread) to sleep until a # signal is delivered that either terminates the process or causes # the invocation of a signal-catching function. pause = IrregularEmulatedSyscall(x86=29, x64=34) # int utime(const char *filename, const struct utimbuf *times) # # The utime() system call changes the access and modification times # of the inode specified by filename to the actime and modtime fields # of times respectively. # # If times is NULL, then the access and modification times of the # file are set to the current time. # # Changing timestamps is permitted when: either the process has # appropriate privileges, or the effective user ID equals the user ID # of the file, or times is NULL and the process has write permission # for the file. # # FIXME: is mod_time set by the kernel? utime = EmulatedSyscall(x86=30, x64=132) stty = InvalidSyscall(x86=31) gtty = InvalidSyscall(x86=32) # int access(const char *pathname, int mode); # # access() checks whether the calling process can access the file # pathname. If pathname is a symbolic link, it is dereferenced. access = EmulatedSyscall(x86=33, x64=21) nice = UnsupportedSyscall(x86=34) ftime = InvalidSyscall(x86=35) sync = EmulatedSyscall(x86=36, x64=162) # int kill(pid_t pid, int sig) # # The kill() system call can be used to send any signal to any # process group or process. kill = EmulatedSyscall(x86=37, x64=62) # int rename(const char *oldpath, const char *newpath) # # rename() renames a file, moving it between directories if required. rename = EmulatedSyscall(x86=38, x64=82) # int mkdir(const char *pathname, mode_t mode); # # mkdir() attempts to create a directory named pathname. mkdir = EmulatedSyscall(x86=39, x64=83) # int rmdir(const char *pathname) # # rmdir() deletes a directory, which must be empty. rmdir = EmulatedSyscall(x86=40, x64=84) # int dup(int oldfd) # # dup() uses the lowest-numbered unused descriptor for the new # descriptor. dup = EmulatedSyscall(x86=41, x64=32) # int pipe(int pipefd[2]); # # pipe() creates a pipe, a unidirectional data channel that can be # used for interprocess communication. The array pipefd is used to # return two file descriptors referring to the ends of the pipe. # pipefd[0] refers to the read end of the pipe. pipefd[1] refers to # the write end of the pipe. Data written to the write end of the # pipe is buffered by the kernel until it is read from the read end # of the pipe. For further details, see pipe(7). pipe = EmulatedSyscall(x86=42, x64=22, arg1="int[2]") # clock_t times(struct tms *buf) # # times() stores the current process times in the struct tms that buf # points to. The struct tms is as defined in : times = EmulatedSyscall(x86=43, x64=100, arg1="typename Arch::tms") prof = InvalidSyscall(x86=44) # int brk(void *addr) # # brk() and sbrk() change the location of the program break, which # defines the end of the process's data segment (i.e., theprogram # break is the first location after the end of the uninitialized data # segment). Increasing the program break has the effect of # allocating memory to the process; decreasing the break deallocates # memory. # # brk() sets the end of the data segment to the value specified by # addr, when that value is reasonable, the system has enough memory, # and the process does not exceed its maximum data size (see # setrlimit(2)). brk = IrregularEmulatedSyscall(x86=45, x64=12) # int setgid(gid_t gid) # # setgid() sets the effective group ID of the calling process. # If the caller is the superuser, the real GID and saved set-group-ID # are also set. # # Under Linux, setgid() is implemented like the POSIX version with the # _POSIX_SAVED_IDS feature. This allows a set-group-ID program that # is not set-user-ID-root to drop all of its group privileges, do some # un-privileged work, and then reengage the original effective group # ID in a secure manner. # # setgid will return 0 on success, or if the process already runs # under the given gid. setgid = EmulatedSyscall(x86=46, x64=106) getgid = EmulatedSyscall(x86=47, x64=104) signal = UnsupportedSyscall(x86=48) geteuid = EmulatedSyscall(x86=49, x64=107) getegid = EmulatedSyscall(x86=50, x64=108) acct = UnsupportedSyscall(x86=51, x64=163) umount2 = UnsupportedSyscall(x86=52, x64=166) lock = InvalidSyscall(x86=53) # int ioctl(int d, int request, ...) # # The ioctl() function manipulates the underlying device parameters # of special files. In particular, many operating characteristics of # character special files (e.g., terminals) may be controlled with # ioctl() requests. The argument d must be an open file descriptor. # ioctl = IrregularEmulatedSyscall(x86=54, x64=16) fcntl = IrregularEmulatedSyscall(x86=55, x64=72) mpx = InvalidSyscall(x86=56) # int setpgid(pid_t pid, pid_t pgid); # # setpgid() sets the PGID of the process specified by pid to pgid. # If pid is zero, then the process ID of the calling process is used. # If pgid is zero, then the PGID of the process specified by pid is # made the same as its process ID. If setpgid() is used to move a # process from one process group to another (as is done by some # shells when creating pipelines), both process groups must be part # of the same session (see setsid(2) and credentials(7)). In this # case, the pgid specifies an existing process group to be joined and # the session ID of that group must match the session ID of the # joining process. setpgid = EmulatedSyscall(x86=57, x64=109) ulimit = InvalidSyscall(x86=58) oldolduname = UnsupportedSyscall(x86=59) # mode_t umask(mode_t mask); # # umask() sets the calling process's file mode creation mask (umask) # to mask & 0777 (i.e., only the file permission bits of mask are # used), and returns the previous value of the mask. umask = EmulatedSyscall(x86=60, x64=95) chroot = EmulatedSyscall(x86=61, x64=161) ustat = UnsupportedSyscall(x86=62, x64=136) # int dup2(int oldfd, int newfd) # # dup2() makes newfd be the copy of oldfd, closing newfd first if # necessary, but note the following.. dup2 = IrregularEmulatedSyscall(x86=63, x64=33) # pid_t getppid(void); # # getppid() returns the process ID of the parent of the calling # process. getppid = EmulatedSyscall(x86=64, x64=110) # pid_t getpgrp(void) # # The POSIX.1 getpgrp() always returns the PGID of the caller. getpgrp = EmulatedSyscall(x86=65, x64=111) # pid_t setsid(void) # # setsid() is used to start a new session and set the new process # group ID. setsid = EmulatedSyscall(x86=66, x64=112) # int sigaction(int signum, const struct sigaction *act, struct sigaction #*oldact); # # The sigaction() system call is used to change the action taken by a # process on receipt of a specific signal. (See signal(7) for an # overview of signals.) # # signum specifies the signal and can be any valid signal except # SIGKILL and SIGSTOP. # # If act is non-NULL, the new action for signal signum is installed # from act. If oldact is non-NULL, the previous action is saved in # oldact. sigaction = EmulatedSyscall(x86=67, arg3="typename Arch::kernel_sigaction") sgetmask = UnsupportedSyscall(x86=68) ssetmask = UnsupportedSyscall(x86=69) setreuid = UnsupportedSyscall(x86=70, x64=113) setregid = UnsupportedSyscall(x86=71, x64=114) sigsuspend = IrregularEmulatedSyscall(x86=72) sigpending = UnsupportedSyscall(x86=73) sethostname = UnsupportedSyscall(x86=74, x64=170) # int setrlimit(int resource, const struct rlimit *rlim) # # getrlimit() and setrlimit() get and set resource limits # respectively. Each resource has an associated soft and hard limit, # as defined by the rlimit structure (the rlim argument to both # getrlimit() and setrlimit()): # # NOTE: This syscall is emulated so the limit does not apply during # replay. Any signals triggered due to exceeded limits are emulated # by other means. setrlimit = EmulatedSyscall(x86=75, x64=160) getrlimit = EmulatedSyscall(x64=97, arg2="typename Arch::rlimit") # int getrusage(int who, struct rusage *usage) # # getrusage() returns resource usage measures for who, which can be # one of the following.. getrusage = EmulatedSyscall(x86=77, x64=98, arg2="typename Arch::rusage") # int gettimeofday(struct timeval *tv, struct timezone *tz); # # The functions gettimeofday() and settimeofday() can get and set the # time as well as a timezone. The tv argument is a struct timeval # (as specified in ): gettimeofday = EmulatedSyscall(x86=78, x64=96, arg1="typename Arch::timeval", arg2="typename Arch::timezone") settimeofday = UnsupportedSyscall(x86=79, x64=164) getgroups = IrregularEmulatedSyscall(x86=80, x64=115) setgroups = EmulatedSyscall(x86=81, x64=116) select = IrregularEmulatedSyscall(x86=82, x64=23) # int symlink(const char *oldpath, const char *newpath) # # symlink() creates a symbolic link named newpath which contains the # string oldpath. symlink = EmulatedSyscall(x86=83, x64=88) oldlstat = UnsupportedSyscall(x86=84) # ssize_t readlink(const char *path, char *buf, size_t bufsiz); # # readlink() places the contents of the symbolic link path in the # buffer buf, which has size bufsiz. readlink() does not append a # null byte to buf. It will truncate the contents (to a length of # bufsiz characters), in case the buffer is too small to hold all of # the contents. readlink = IrregularEmulatedSyscall(x86=85, x64=89) uselib = UnsupportedSyscall(x86=86, x64=134) swapon = UnsupportedSyscall(x86=87, x64=167) reboot = UnsupportedSyscall(x86=88, x64=169) readdir = UnsupportedSyscall(x86=89) # void *mmap2(void *addr, size_t length, int prot,int flags, int fd, off_t #pgoffset); # # The mmap2() system call operates in exactly the same way as # mmap(2), except that the final argument specifies the offset into # the file in 4096-byte units (instead of bytes, as is done by # mmap(2)). This enables applications that use a 32-bit off_t to map # large files (up to 2^44 bytes). mmap = IrregularEmulatedSyscall(x86=90, x64=9) # int munmap(void *addr, size_t length) # # The munmap() system call deletes the mappings for the specified # address range, and causes further references to addresses within # the range to generate invalid memory references. The region is # also automatically unmapped when the process is terminated. On the # other hand, closing the file descriptor does not unmap the region. munmap = IrregularEmulatedSyscall(x86=91, x64=11) # int truncate(const char *path, off_t length); # int ftruncate(int fd, off_t length) # # The truncate() and ftruncate() functions cause the regular file # named by path or referenced by fd to be truncated to a size of # precisely length bytes. truncate = EmulatedSyscall(x86=92, x64=76) ftruncate = EmulatedSyscall(x86=93, x64=77) # int fchmod(int fd, mode_t mode); # # fchmod() changes the permissions of the file referred to by the # open file descriptor fd fchmod = EmulatedSyscall(x86=94, x64=91) fchown = EmulatedSyscall(x86=95, x64=93) # int getpriority(int which, int who); # # The scheduling priority of the process, process group, or user, as # indicated by which and who is obtained with the getpriority() call. getpriority = EmulatedSyscall(x86=96, x64=140) # int setpriority(int which, int who, int prio); # # The scheduling priority of the process, process group, or user, as # indicated by which and who is obtained with the getpriority() call # and set with the setpriority() call. setpriority = IrregularEmulatedSyscall(x86=97, x64=141) profil = InvalidSyscall(x86=98) # int statfs(const char *path, struct statfs *buf) # # The function statfs() returns information about a mounted file # system. path is the pathname of any file within the mounted file # system. buf is a pointer to a statfs structure defined # approximately as follows: statfs = EmulatedSyscall(x86=99, x64=137, arg2="typename Arch::statfs") # int fstatfs(int fd, struct statfs *buf) # # The function statfs() returns information about a mounted file # system. path is the pathname of any file within the # get_time(GET_TID(thread_id));mounted file system. buf is a pointer # to a statfs structure defined approximately as follows: fstatfs = EmulatedSyscall(x86=100, x64=138, arg2="typename Arch::statfs") ioperm = UnsupportedSyscall(x86=101, x64=173) # int socketcall(int call, unsigned long *args) # # socketcall() is a common kernel entry point for the socket system # calls. call determines which socket function to invoke. args # points to a block containing the actual arguments, which are passed # through to the appropriate call. socketcall = IrregularEmulatedSyscall(x86=102) syslog = UnsupportedSyscall(x86=103, x64=103) # int setitimer(int which, const struct itimerval *new_value, struct itimerval #*old_value); # # The function setitimer() sets the specified timer to the value in # new_value. If old_value is non-NULL, the old value of the timer is # stored there. setitimer = EmulatedSyscall(x86=104, x64=38, arg3="typename Arch::itimerval") getitimer = EmulatedSyscall(x86=105, x64=36, arg2="typename Arch::itimerval") stat = EmulatedSyscall(x86=106, x64=4, arg2="typename Arch::stat") lstat = EmulatedSyscall(x86=107, x64=6, arg2="typename Arch::stat") fstat = EmulatedSyscall(x86=108, x64=5, arg2="typename Arch::stat") olduname = UnsupportedSyscall(x86=109) iopl = UnsupportedSyscall(x86=110, x64=172) vhangup = UnsupportedSyscall(x86=111, x64=153) idle = UnsupportedSyscall(x86=112) vm86old = UnsupportedSyscall(x86=113) # pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage); # # The wait3() and wait4() system calls are similar to waitpid(2), but # additionally return resource usage information about the child in # the structure pointed to by rusage. wait4 = IrregularEmulatedSyscall(x86=114, x64=61) swapoff = UnsupportedSyscall(x86=115, x64=168) # int sysinfo(struct sysinfo *info) # # sysinfo() provides a simple way of getting overall system # statistics. sysinfo = EmulatedSyscall(x86=116, x64=99, arg1="typename Arch::sysinfo") # int ipc(unsigned int call, int first, int second, int third, void *ptr, long #fifth); # # ipc() is a common kernel entry point for the System V IPC calls for # messages, semaphores, and shared memory. call determines which IPC # function to invoke; the other arguments are passed through to the # appropriate call. ipc = IrregularEmulatedSyscall(x86=117) # int fsync(int fd) # # fsync() transfers ("flushes") all modified in-core data of (i.e., # modified buffer cache pages for) the file referred to by the file # descriptor fd to the disk device (or other permanent storage # device) where that file resides. The call blocks until the device # reports that the transfer has completed. It also flushes metadata # information associated with the file (see stat(2)) fsync = EmulatedSyscall(x86=118, x64=74) # int sigreturn(unsigned long __unused) # # When the Linux kernel creates the stack frame for a signal handler, # a call to sigreturn() is inserted into the stack frame so that upon # return from the signal handler, sigreturn() will be called. sigreturn = EmulatedSyscall(x86=119) # int clone(int (*fn)(void *), void *child_stack, int flags, void *arg, (pid_t #*ptid, struct user_desc *tls, pid_t *ctid)); # # clone() creates a new process, in a manner similar to fork(2). It # is actually a library function layered ontrace top of the # underlying clone() system call, hereinafter referred to tidas # sys_clone. A description of sys_clone is given towards the end of # this page. # # NOTE: clone is actually implemented by sys_clone which has the # following signature: # # long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user #*parent_tid, void __user *child_tid, struct pt_regs *regs) clone = IrregularEmulatedSyscall(x86=120, x64=56) setdomainname = UnsupportedSyscall(x86=121, x64=171) # int uname(struct utsname *buf) # # uname() returns system information in the structure pointed to by # buf. The utsname struct is defined in : uname = EmulatedSyscall(x86=122, x64=63, arg1="typename Arch::utsname") modify_ldt = UnsupportedSyscall(x86=123, x64=154) adjtimex = UnsupportedSyscall(x86=124, x64=159) # int mprotect(const void *addr, size_t len, int prot) # # mprotect() changes protection for the calling process's memory # page(s) containing any part of the address range in the interval # [addr, addr+len-1]. addr must be aligned to a page boundary. # # If the calling process tries to access memory in a manner that # violates the protection, then the kernel generates a SIGSEGV signal # for the process. mprotect = IrregularEmulatedSyscall(x86=125, x64=10) # int sigprocmask(int how, const sigset_t *set, sigset_t *oldset); # # sigprocmask() is used to fetch and/or change the signal mask of the # calling thread. The signal mask is the set of signals whose # delivery is currently blocked for the caller (see also signal(7) # for more details). sigprocmask = IrregularEmulatedSyscall(x86=126) create_module = UnsupportedSyscall(x86=127, x64=174) init_module = UnsupportedSyscall(x86=128, x64=175) delete_module = UnsupportedSyscall(x86=129, x64=176) get_kernel_syms = UnsupportedSyscall(x86=130, x64=177) # int quotactl(int cmd, const char *special, int id, caddr_t addr); # # The quotactl() call manipulates disk quotas. The cmd argument # indicates a command to be applied to the user or group ID # specified in id. To initialize the cmd argument, use the # QCMD(subcmd, type) macro. The type value is either USRQUOTA, for # user quotas, or GRPQUOTA, for group quotas. The subcmd value is # described below. quotactl = IrregularEmulatedSyscall(x86=131, x64=179) # pid_t getpgid(pid_t pid); # # getpgid() returns the PGID of the process specified by pid. If pid # is zero, getpgid() the process ID of the calling process is # used.int getrusage(int who, struct rusage *usage); getpgid = EmulatedSyscall(x86=132, x64=121) # int fchdir(int fd); # # fchdir() is identical to chdir(); the only difference is that the # directory is given as an open file descriptor. fchdir = EmulatedSyscall(x86=133, x64=81) bdflush = UnsupportedSyscall(x86=134) sysfs = UnsupportedSyscall(x86=135, x64=139) personality = IrregularEmulatedSyscall(x86=136, x64=135) afs_syscall = InvalidSyscall(x86=137, x64=183) setfsuid = UnsupportedSyscall(x86=138, x64=122) setfsgid = UnsupportedSyscall(x86=139, x64=123) # int _llseek(unsigned int fd, unsigned long offset_high, unsigned long #offset_low, loff_t *result, unsigned int whence); # # The _llseek() function repositions the offset of the open file # associated with the file descriptor fd to (offset_high<<32) | # offset_low bytes relative to the beginning of the file, the current # position in the file, or the end of the file, depending on whether # whence is SEEK_SET, SEEK_CUR, or SEEK_END, respectively. It # returns the resulting file position in the argument result. _llseek = EmulatedSyscall(x86=140, arg4="typename Arch::__kernel_loff_t") # int getdents(unsigned int fd, struct linux_dirent *dirp, unsigned int #count); # # The system call getdents() reads several linux_dirent structures # from the directory referred to by the open file descriptor fd into # the buffer pointed to by dirp. The argument count specifies the # size of that buffer. getdents = IrregularEmulatedSyscall(x86=141, x64=78) # int select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, #struct timeval *timeout); # # select() and pselect() allow a program to monitor multiple file # descriptors, waiting until one or more of the file descriptors # become "ready" for some class of I/O operation (e.g., input # possible). A file descriptor is considered ready if it is possible # to perform the corresponding I/O operation (e.g., read(2)) without # blocking. _newselect = IrregularEmulatedSyscall(x86=142) flock = EmulatedSyscall(x86=143, x64=73) # int msync(void *addr, size_t length, int flags); # # msync() flushes changes made to the in-core copy of a file that was # mapped into memory using mmap(2) back to disk. Without use of this # call there is no guarantee that changes are written back before # munmap(2) is called. To be more precise, the part of the file that # corresponds to the memory area starting at addr and having length # length is updated. msync = EmulatedSyscall(x86=144, x64=26) # ssize_t readv(int fd, const struct iovec *iov, int iovcnt); # # The readv() system call reads iovcnt buffers from the file associated # with the file descriptor fd into the buffers described by iov ("scatter # input"). readv = IrregularEmulatedSyscall(x86=145, x64=19) # ssize_t writev(int fd, const struct iovec *iov, int iovcnt) # # The writev() function writes iovcnt buffers of data described by # iov to the file associated with the file descriptor fd ("gather # output"). writev = IrregularEmulatedSyscall(x86=146, x64=20) # pid_t getsid(pid_t pid); # # getsid(0) returns the session ID of the calling process. getsid(p) # returns the session ID of the process with process ID p. (The session # ID of a process is the process group ID of the session leader.) getsid = EmulatedSyscall(x86=147, x64=124) # int fdatasync(int fd) # # fdatasync() is similar to fsync(), but does not flush modified # metadata unless that metadata is needed in order to allow a # subsequent data retrieval to be correctly handled. For example, # changes to st_atime or st_mtime (respectively, time of last access # and time of last modification; see stat(2)) do not require flushing # because they are not necessary for a subsequent data read to be # handled correctly. On the other hand, a change to the file size # (st_size, as made by say ftruncate(2)), would require a metadata # flush fdatasync = EmulatedSyscall(x86=148, x64=75) # int _sysctl(struct __syscall_args* args); # # The _sysctl() call reads and/or writes kernel parameters. For example, # the hostname, or the maximum number of open files. # # Often not supported in modern kernels, so can return ENOSYS. _sysctl = IrregularEmulatedSyscall(x86=149, x64=156) mlock = EmulatedSyscall(x86=150, x64=149) munlock = EmulatedSyscall(x86=151, x64=150) mlockall = EmulatedSyscall(x86=152, x64=151) munlockall = EmulatedSyscall(x86=153, x64=152) sched_setparam = EmulatedSyscall(x86=154, x64=142) # int sched_getparam(pid_t pid, struct sched_param *param) # # sched_getparam() retrieves the scheduling parameters for the # process i dentified by pid. If pid is zero, then the parameters of # the calling process are retrieved. sched_getparam = EmulatedSyscall(x86=155, x64=143, arg2="typename Arch::sched_param") # int sched_setscheduler(pid_t pid, int policy, const struct sched_param #*param); # # sched_setscheduler() sets both the scheduling policy and the # associated parameters for the process whose ID is specified in pid. # If pid equals zero, the scheduling policy and parameters of the # calling process will be set. The interpretation of the argument # param depends on the selected policy. sched_setscheduler = EmulatedSyscall(x86=156, x64=144) # int sched_getscheduler(pid_t pid); # # sched_getscheduler() queries the scheduling policy currently # applied to the process identified by pid. If pid equals zero, the # policy of the calling process will be retrieved. sched_getscheduler = EmulatedSyscall(x86=157, x64=145) # int sched_yield(void) # # sched_yield() causes the calling thread to relinquish the CPU. The # thread is moved to the end of the queue for its static priority and # a new thread gets to run. sched_yield = IrregularEmulatedSyscall(x86=158, x64=24) # int sched_get_priority_max(int policy) # # sched_get_priority_max() returns the maximum priority value that # can be used with the scheduling algorithm identified by policy. sched_get_priority_max = EmulatedSyscall(x86=159, x64=146) # int sched_get_priority_min(int policy) # # sched_get_priority_min() returns the minimum priority value that # can be used with the scheduling algorithm identified by policy. sched_get_priority_min = EmulatedSyscall(x86=160, x64=147) sched_rr_get_interval = UnsupportedSyscall(x86=161, x64=148) # int nanosleep(const struct timespec *req, struct timespec *rem) # # nanosleep() suspends the execution of the calling thread until # either at least the time specified in *req has elapsed, or the # delivery of a signal that triggers the invocation of a handler in # the calling thread or that ter- minates the process. # # CHECKED: trace->recorded_regs.ecx != NULL nanosleep = IrregularEmulatedSyscall(x86=162, x64=35) # void *mremap(void *old_address, size_t old_size, size_t new_size, int flags, #... ( void *new_address )); # # mremap() expands (or shrinks) an existing memory mapping, # potentially moving it at the same time (controlled by the flags # argument and the available virtual address space). mremap = EmulatedSyscall(x86=163, x64=25) # int setresuid(uid_t ruid, uid_t euid, uid_t suid); # # setresuid() sets the real user ID, the effective user ID, and the # saved set-user-ID of the calling process. setresuid = EmulatedSyscall(x86=164, x64=117) getresuid = EmulatedSyscall(x86=165, x64=118, arg1="typename Arch::legacy_uid_t", arg2="typename Arch::legacy_uid_t", arg3="typename Arch::legacy_uid_t") vm86 = UnsupportedSyscall(x86=166) query_module = UnsupportedSyscall(x86=167, x64=178) # int poll(struct pollfd *fds, nfds_t nfds, int timeout) # int ppoll(struct pollfd *fds, nfds_t nfds, # const struct timespec *timeout_ts, # const sigset_t *sigmask); # # poll() performs a similar task to select(2): it waits for one of a # set of file descriptors to become ready to perform I/O. # # The relationship between poll() and ppoll() is analogous to the # relationship between select(2) and pselect(2): like pselect(2), # ppoll() allows an application to safely wait until either a file # descriptor becomes ready or until a signal is caught. # # XXX is this irregular? CHECKED: (trace->recorded_regs.eax > 0) poll = IrregularEmulatedSyscall(x86=168, x64=7) nfsservctl = UnsupportedSyscall(x86=169, x64=180) # int setresgid(gid_t rgid, gid_t egid, gid_t sgid); # # setresgid() sets the real GID, effective GID, and saved # set-group-ID of the calling process. setresgid = EmulatedSyscall(x86=170, x64=119) getresgid = EmulatedSyscall(x86=171, x64=120, arg1="typename Arch::legacy_gid_t", arg2="typename Arch::legacy_gid_t", arg3="typename Arch::legacy_gid_t") # int prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long #arg4, unsigned long arg5); # # prctl() is called with a first argument describing what to do (with # values defined in ), and further arguments with a # significance depending on the first one. # prctl = IrregularEmulatedSyscall(x86=172, x64=157) rt_sigreturn = EmulatedSyscall(x86=173, x64=15) rt_sigaction = EmulatedSyscall(x86=174, x64=13, arg3="typename Arch::kernel_sigaction") rt_sigprocmask = IrregularEmulatedSyscall(x86=175, x64=14) # int sigpending(sigset_t *set); # # sigpending() returns the set of signals that are pending for # delivery to the calling thread (i.e., the signals which have been # raised while blocked). The mask of pending signals is returned in # set. rt_sigpending = IrregularEmulatedSyscall(x86=176, x64=127) # int sigtimedwait(const sigset_t *set, siginfo_t *info, # const struct timespec *timeout); # # sigwaitinfo() suspends execution of the calling thread until one of # the signals in set is pending (If one of the signals in set is # already pending for the calling thread, sigwaitinfo() will return # immedi ately.) # # sigtimedwait() operates in exactly the same way as sigwaitinfo() # except that it has an additional argument, timeout, which specifies # a minimum interval for which the thread is suspended waiting for a # signal. rt_sigtimedwait = IrregularEmulatedSyscall(x86=177, x64=128) # int sigsuspend(const sigset_t *mask); # # sigsuspend() temporarily replaces the signal mask of the calling # process with the mask given by mask and then suspends the process # until delivery of a signal whose action is to invoke a signal # handler or to terminate a process. rt_sigsuspend = IrregularEmulatedSyscall(x86=179, x64=130) # ssize_t pread(int fd, void *buf, size_t count, off_t offset); # # pread, pwrite - read from or write to a file descriptor at a given # offset pread64 = IrregularEmulatedSyscall(x86=180, x64=17) pwrite64 = EmulatedSyscall(x86=181, x64=18) chown = EmulatedSyscall(x86=182, x64=92) # char *getwd(char *buf); # # These functions return a null-terminated string containing an # absolute pathname that is the current working directory of the # calling process. The pathname is returned as the function result # and via the argument buf, if present. getcwd = IrregularEmulatedSyscall(x86=183, x64=79) capget = IrregularEmulatedSyscall(x86=184, x64=125) capset = EmulatedSyscall(x86=185, x64=126) # int sigaltstack(const stack_t *ss, stack_t *oss) # # sigaltstack() allows a process to define a new alternate signal # stack and/or retrieve the state of an existing alternate signal # stack. An alternate signal stack is used during the execution of a # signal handler if the establishment of that handler (see # sigaction(2)) requested it. sigaltstack = EmulatedSyscall(x86=186, x64=131, arg2="typename Arch::stack_t") sendfile = IrregularEmulatedSyscall(x86=187, x64=40) getpmsg = InvalidSyscall(x86=188, x64=181) putpmsg = InvalidSyscall(x86=189, x64=182) vfork = IrregularEmulatedSyscall(x86=190, x64=58) # int getrlimit(int resource, struct rlimit *rlim) # # getrlimit() and setrlimit() get and set resource limits # respectively. Each resource has an associated soft and hard limit, # as defined by the rlimit structure (the rlim argument to both # getrlimit() and setrlimit()): ugetrlimit = EmulatedSyscall(x86=191, arg2="typename Arch::rlimit") mmap2 = IrregularEmulatedSyscall(x86=192) truncate64 = EmulatedSyscall(x86=193) ftruncate64 = EmulatedSyscall(x86=194) # int stat(const char *path, struct stat *buf); # # stat() stats the file pointed to by path and fills in buf. stat64 = EmulatedSyscall(x86=195, arg2="typename Arch::stat64") # int lstat(const char *path, struct stat *buf); # # lstat() is identical to stat(), except that if path is a symbolic # link, then the link itself is stat-ed, not the file that it refers # to. lstat64 = EmulatedSyscall(x86=196, arg2="typename Arch::stat64") # int fstat(int fd, struct stat *buf) # # fstat() is identical to stat(), except that the file to be stat-ed # is specified by the file descriptor fd. fstat64 = EmulatedSyscall(x86=197, arg2="typename Arch::stat64") lchown32 = EmulatedSyscall(x86=198) # uid_t getuid(void); # # getuid() returns the real user ID of the calling process getuid32 = EmulatedSyscall(x86=199) # gid_t getgid(void); # # getgid() returns the real group ID of the calling process. getgid32 = EmulatedSyscall(x86=200) # uid_t geteuid(void); # # geteuid() returns the effective user ID of the calling process. geteuid32 = EmulatedSyscall(x86=201) # gid_t getegid(void); # # getegid() returns the effective group ID of the calling process. getegid32 = EmulatedSyscall(x86=202) setreuid32 = UnsupportedSyscall(x86=203) # int setregid(gid_t rgid, gid_t egid) # # setreuid() sets real and effective user IDs of the calling process setregid32 = EmulatedSyscall(x86=204) # int getgroups(int size, gid_t list[]); # # getgroups() returns the supplementary group IDs of the calling # process in list. The argument size should be set to the maximum # number of items that can be stored in the buffer pointed to by # list. If the calling process is a member of more than size # supplementary groups, then an error results. It is unspecified # whether the effective group ID of the calling process is included # in the returned list. (Thus, an application should also call # getegid(2) and add or remove the resulting value.) # # If size is zero, list is not modified, but the total number of # supplementary group IDs for the process is returned. This allows # the caller to determine the size of a dynamically allocated list to # be used in a further call to getgroups(). getgroups32 = IrregularEmulatedSyscall(x86=205) setgroups32 = EmulatedSyscall(x86=206) fchown32 = EmulatedSyscall(x86=207) # int setresuid32(uid_t ruid, uid_t euid, uid_t suid); # # setresuid() sets the real user ID, the effective user ID, and the # saved set-user-ID of the calling process. setresuid32 = EmulatedSyscall(x86=208) # int getresuid(uid_t *ruid, uid_t *euid, uid_t *suid) # # getresuid() returns the real UID, the effective UID, and the saved # set- user-ID of the calling process, in the arguments ruid, euid, # and suid, respectively. getresgid() performs the analogous task # for the process's group IDs. getresuid32 = EmulatedSyscall(x86=209, arg1="typename Arch::uid_t", arg2="typename Arch::uid_t", arg3="typename Arch::uid_t") # int setresgid32(gid_t rgid, gid_t egid, gid_t sgid); # # setresgid() sets the real GID, effective GID, and saved # set-group-ID of the calling process. setresgid32 = EmulatedSyscall(x86=210) # int getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid); # # getresuid() returns the real UID, the effective UID, and the saved # set-user-ID of the calling process, in the arguments ruid, euid, # and suid, respectively. getresgid() performs the analogous task # for the process's group IDs. @return: On success, zero is # returned. On error, -1 is returned, and errno is set # appropriately. getresgid32 = EmulatedSyscall(x86=211, arg1="typename Arch::gid_t", arg2="typename Arch::gid_t", arg3="typename Arch::gid_t") chown32 = EmulatedSyscall(x86=212) setuid32 = EmulatedSyscall(x86=213) setgid32 = EmulatedSyscall(x86=214) setfsuid32 = UnsupportedSyscall(x86=215) setfsgid32 = UnsupportedSyscall(x86=216) pivot_root = UnsupportedSyscall(x86=217, x64=155) mincore = IrregularEmulatedSyscall(x86=218, x64=27) # int madvise(void *addr, size_t length, int advice); # # The madvise() system call advises the kernel about how to handle # paging input/output in the address range beginning at address addr # and with size length bytes. It allows an application to tell the # kernel how it expects to use some mapped or shared memory areas, so # that the kernel can choose appropriate read-ahead and caching # techniques. # The man page says "This call does not influence the semantics of the # application (except in the case of MADV_DONTNEED)", but that is a lie. madvise = IrregularEmulatedSyscall(x86=219, x64=28) getdents64 = IrregularEmulatedSyscall(x86=220, x64=217) # int fcntl(int fd, int cmd, ... ( arg )); # # fcntl() performs one of the operations described below on the open # file descriptor fd. The operation is determined by cmd. fcntl() # can take an optional third argument. Whether or not this argument # is required is determined by cmd. The required argument type is # indicated in parentheses after each cmd name (in most cases, the # required type is long, and we identify the argument using the name # arg), or void is specified if the argument is not required. fcntl64 = IrregularEmulatedSyscall(x86=221) # pid_t gettid(void); # # gettid() returns the caller's thread ID (TID). gettid = EmulatedSyscall(x86=224, x64=186) # ssize_t readahead(int fd, off64_t offset, size_t count); # # readahead() populates the page cache with data from a file so that # subsequent reads from that file will not block on disk I/O. The fd # argument is a file descriptor identifying the file which is to be # read. The offset argu- ment specifies the starting point from # which data is to be read and count specifies the number of bytes to # be read. I/O is performed in whole pages, so that offset is # effectively rounded down to a page boundary and bytes are read up # to the next page boundary greater than or equal to (offset+count). # readahead() does not read beyond the end of the file. readahead() # blocks until the specified data has been read. The current file # offset of the open file referred to by fd is left unchanged. readahead = EmulatedSyscall(x86=225, x64=187) setxattr = EmulatedSyscall(x86=226, x64=188) lsetxattr = EmulatedSyscall(x86=227, x64=189) fsetxattr = EmulatedSyscall(x86=228, x64=190) # ssize_t getxattr(const char *path, const char *name, # void *value, size_t size); # ssize_t lgetxattr(const char *path, const char *name, # void *value, size_t size); # ssize_t fgetxattr(int fd, const char *name, # void *value, size_t size); # # getxattr() retrieves the value of the extended attribute identified # by name and associated with the given path in the file system. The # length of the attribute value is returned. getxattr = IrregularEmulatedSyscall(x86=229, x64=191) lgetxattr = IrregularEmulatedSyscall(x86=230, x64=192) fgetxattr = IrregularEmulatedSyscall(x86=231, x64=193) listxattr = UnsupportedSyscall(x86=232, x64=194) llistxattr = UnsupportedSyscall(x86=233, x64=195) flistxattr = UnsupportedSyscall(x86=234, x64=196) removexattr = EmulatedSyscall(x86=235, x64=197) lremovexattr = EmulatedSyscall(x86=236, x64=198) fremovexattr = EmulatedSyscall(x86=237, x64=199) tkill = UnsupportedSyscall(x86=238, x64=200) # ssize_t sendfile64 (int __out_fd, int __in_fd, __off64_t *__offset, size_t #__count); # # Send up to COUNT bytes from file associated with IN_FD starting at # *OFFSET to descriptor OUT_FD. Set *OFFSET to the IN_FD's file position # following the read bytes. If OFFSET is a null pointer, use the normal # file position instead. Return the number of written bytes, or -1 in # case of error. sendfile64 = IrregularEmulatedSyscall(x86=239) # int futex(int *uaddr, int op, int val, const struct timespec *timeout, int #*uaddr2, int val3); # # The futex() system call provides a method for a program to wait for # a value at a given address to change, and a method to wake up # anyone waiting on a particular address (while the addresses for the # same memory in separate processes may not be equal, the kernel # maps them internally so the same memory mapped in different # locations will correspond for futex() calls). This system call is # typically used to implement the contended case of a lock in shared # memory, as described in futex(7). futex = IrregularEmulatedSyscall(x86=240, x64=202) # int sched_getaffinity(pid_t pid, size_t cpusetsize, cpu_set_t *mask); # # sched_setaffinity() sets the CPU affinity mask of the process whose # ID is pid to the value specified by mask. If pid is zero, then the # calling process is used. The argument cpusetsize is the length # (in bytes) of the data pointed to by mask. Normally this argument # would be specified as sizeof(cpu_set_t). sched_setaffinity = IrregularEmulatedSyscall(x86=241, x64=203) # int sched_getaffinity(pid_t pid, size_t cpusetsize, cpu_set_t *mask); # # sched_getaffinity() writes the affinity mask of the process whose # ID is pid into the cpu_set_t structure pointed to by mask. The # cpusetsize argument specifies the size (in bytes) of mask. If pid # is zero, then the mask of the calling process is returned. sched_getaffinity = IrregularEmulatedSyscall(x86=242, x64=204) # int set_thread_area(struct user_desc *u_info) # # set_thread_area() sets an entry in the current thread's Thread Local # Storage (TLS) array. The TLS array entry set by set_thread_area() # corresponds to the value of u_info->entry_number passed in by the # user. If this value is in bounds, set_thread_area() copies the TLS # descriptor pointed to by u_info into the thread's TLS array. # # When set_thread_area() is passed an entry_number of -1, it uses a free # TLS entry. If set_thread_area() finds a free TLS entry, the value of # u_info->entry_number is set upon return to show which entry was # changed. set_thread_area = IrregularEmulatedSyscall(x86=243, x64=205) get_thread_area = UnsupportedSyscall(x86=244, x64=211) io_setup = UnsupportedSyscall(x86=245, x64=206) io_destroy = UnsupportedSyscall(x86=246, x64=207) io_getevents = UnsupportedSyscall(x86=247, x64=208) io_submit = UnsupportedSyscall(x86=248, x64=209) io_cancel = UnsupportedSyscall(x86=249, x64=210) # int posix_fadvise(int fd, off_t offset, off_t len, int advice); # # Programs can use posix_fadvise() to announce an intention to access # file data in a specific pattern in the future, thus allowing the # kernel to perform appropriate optimizations. fadvise64 = EmulatedSyscall(x86=250, x64=221) # void exit_group(int status) # # This system call is equivalent to exit(2) except that it terminates # not only the calling thread, but all threads in the calling # process's thread group. exit_group = IrregularEmulatedSyscall(x86=252, x64=231) lookup_dcookie = UnsupportedSyscall(x86=253, x64=212) # int epoll_create(int size); # # epoll_create() creates an epoll "instance", requesting the kernel # to allocate an event backing store dimensioned for size # descriptors. The size is not the maximum size of the backing store # but just a hint to the kernel about how to dimension internal # structures. When no longer required, the file descriptor returned # by epoll_create() should be closed by using close(2). epoll_create = EmulatedSyscall(x86=254, x64=213) # int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event) # # This system call performs control operations on the epoll instance # referred to by the file descriptor epfd. It requests that the # operation op be performed for the target file descriptor, fd. epoll_ctl = EmulatedSyscall(x86=255, x64=233) # int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int #timeout); # # The epoll_wait() system call waits for events on the epoll instance # referred to by the file descriptor epfd. The memory area pointed # to by events will contain the events that will be available for the # caller. Up to maxevents are returned by epoll_wait(). The # maxevents argument must be greater than zero. # # XXX is this irregular? CHECKED: (trace->recorded_regs.eax >= 0) epoll_wait = IrregularEmulatedSyscall(x86=256, x64=232) remap_file_pages = UnsupportedSyscall(x86=257, x64=216) # long set_tid_address(int *tidptr); # # The kernel keeps for each process two values called set_child_tid # and clear_child_tid that are NULL by default. # # If a process is started using clone(2) with the CLONE_CHILD_SETTID # flag, set_child_tid is set to child_tidptr, the fifth argument of # that system call. # # When set_child_tid is set, the very first thing the new process # does is writing its PID at this address. set_tid_address = EmulatedSyscall(x86=258, x64=218) timer_create = EmulatedSyscall(x86=259, x64=222, arg3="typename Arch::__kernel_timer_t") timer_settime = EmulatedSyscall(x86=260, x64=223, arg4="typename Arch::itimerspec") timer_gettime = EmulatedSyscall(x86=261, x64=224, arg2="typename Arch::itimerspec") timer_getoverrun = EmulatedSyscall(x86=262, x64=225) timer_delete = EmulatedSyscall(x86=263, x64=226) clock_settime = UnsupportedSyscall(x86=264, x64=227) # int clock_gettime(clockid_t clk_id, struct timespec *tp); # # The functions clock_gettime() and clock_settime() retrieve and set # the time of the specified clock clk_id. clock_gettime = EmulatedSyscall(x86=265, x64=228, arg2="typename Arch::timespec") # int clock_getres(clockid_t clk_id, struct timespec *res) # # The function clock_getres() finds the resolution (precision) of the # specified clock clk_id, and, if res is non-NULL, stores it in the # struct timespec pointed to by res. The resolution of clocks # depends on the implementation and cannot be configured by a # particular process. If the time value pointed to by the argument # tp of clock_settime() is not a multiple of res, then it is # truncated to a multiple of res. clock_getres = EmulatedSyscall(x86=266, x64=229, arg2="typename Arch::timespec") clock_nanosleep = UnsupportedSyscall(x86=267, x64=230) # int statfs(const char *path, struct statfs *buf) # # The function statfs() returns information about a mounted file # system. path is the pathname of any file within the mounted file # system. buf is a pointer to a statfs structure defined # approximately as follows... # # FIXME: we use arg3() here, although according to man pages this system # call has only 2 paramaters. However, strace tells another story... statfs64 = EmulatedSyscall(x86=268, arg3="typename Arch::statfs64") fstatfs64 = EmulatedSyscall(x86=269, arg3="typename Arch::statfs64") # int tgkill(int tgid, int tid, int sig) # # tgkill() sends the signal sig to the thread with the thread ID tid # in the thread group tgid. (By contrast, kill(2) can only be used # to send a signal to a process (i.e., thread group) as a whole, and # the signal will be delivered to an arbitrary thread within that # process.) tgkill = EmulatedSyscall(x86=270, x64=234) # int utimes(const char *filename, const struct timeval times[2]) # # The utime() system call changes the access and modification times # of the inode specified by filename to the actime and modtime fields # of times respectively. # utimes = EmulatedSyscall(x86=271, x64=235) fadvise64_64 = EmulatedSyscall(x86=272) vserver = InvalidSyscall(x86=273, x64=236) mbind = EmulatedSyscall(x86=274, x64=237) get_mempolicy = UnsupportedSyscall(x86=275, x64=239) set_mempolicy = UnsupportedSyscall(x86=276, x64=238) mq_open = UnsupportedSyscall(x86=277, x64=240) mq_unlink = UnsupportedSyscall(x86=278, x64=241) mq_timedsend = UnsupportedSyscall(x86=279, x64=242) mq_timedreceive = UnsupportedSyscall(x86=280, x64=243) mq_notify = UnsupportedSyscall(x86=281, x64=244) mq_getsetattr = UnsupportedSyscall(x86=282, x64=245) kexec_load = UnsupportedSyscall(x86=283, x64=246) # int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); # # If WNOHANG was specified in options and there were no children in a # waitable state, then waitid() returns 0 immediately and the state # of the siginfo_t structure pointed to by infop is unspecified. To # distinguish this case from that where a child was in a waitable # state, zero out the si_pid field before the call and check for a # nonzero value in this field after the call returns. waitid = IrregularEmulatedSyscall(x86=284, x64=247) add_key = UnsupportedSyscall(x86=286, x64=248) request_key = UnsupportedSyscall(x86=287, x64=249) keyctl = UnsupportedSyscall(x86=288, x64=250) ioprio_set = UnsupportedSyscall(x86=289, x64=251) ioprio_get = UnsupportedSyscall(x86=290, x64=252) # int inotify_init(void) # # inotify_init() initializes a new inotify instance and returns a # file descriptor associated with a new inotify event queue. inotify_init = EmulatedSyscall(x86=291, x64=253) # int inotify_add_watch(int fd, const char *pathname, uint32_t mask) # # inotify_add_watch() adds a new watch, or modifies an existing # watch, for the file whose location is specified in pathname; the # caller must have read permission for this file. The fd argument is # a file descrip tor referring to the inotify instance whose watch # list is to be modified. The events to be monitored for pathname # are specified in the mask bit-mask argument. See inotify(7) for a # description of the bits that can be set in mask. inotify_add_watch = EmulatedSyscall(x86=292, x64=254) # int inotify_rm_watch(int fd, uint32_t wd) # # inotify_rm_watch() removes the watch associated with the watch # descriptor wd from the inotify instance associated with the file # descriptor fd. inotify_rm_watch = EmulatedSyscall(x86=293, x64=255) migrate_pages = UnsupportedSyscall(x86=294, x64=256) # int openat(int dirfd, const char *pathname, int flags); # int openat(int dirfd, const char *pathname, int flags, mode_t mode); # # The openat() system call operates in exactly the same way as # open(2), except for the differences described in this manual page. openat = EmulatedSyscall(x86=295, x64=257) # int mkdirat(int dirfd, const char *pathname, mode_t mode); # # The mkdirat() system call operates in exactly the same way as # mkdir(2), except for the differences described in this manual # page.... mkdirat = EmulatedSyscall(x86=296, x64=258) mknodat = UnsupportedSyscall(x86=297, x64=259) fchownat = UnsupportedSyscall(x86=298, x64=260) futimesat = UnsupportedSyscall(x86=299, x64=261) # int fstatat(int dirfd, const char *pathname, struct stat *buf, int flags); # # The fstatat() system call operates in exactly the same way as # stat(2), except for the differences described in this manual # page.... fstatat64 = EmulatedSyscall(x86=300, x64=262, arg3="typename Arch::stat64") # int unlinkat(int dirfd, const char *pathname, int flags) # # The unlinkat() system call operates in exactly the same way as # either unlink(2) or rmdir(2) (depending on whether or not flags # includes the AT_REMOVEDIR flag) except for the differences # described in this manual page. unlinkat = EmulatedSyscall(x86=301, x64=263) renameat = UnsupportedSyscall(x86=302, x64=264) linkat = UnsupportedSyscall(x86=303, x64=265) symlinkat = UnsupportedSyscall(x86=304, x64=266) readlinkat = IrregularEmulatedSyscall(x86=305, x64=267) fchmodat = UnsupportedSyscall(x86=306, x64=268) # int faccessat(int dirfd, const char *pathname, int mode, int flags) # # The faccessat() system call operates in exactly the same way as # access(2), except for the differences described in this manual # page.... faccessat = EmulatedSyscall(x86=307, x64=269) pselect6 = IrregularEmulatedSyscall(x86=308, x64=270) ppoll = IrregularEmulatedSyscall(x86=309, x64=271) unshare = IrregularEmulatedSyscall(x86=310, x64=272) # long set_robust_list(struct robust_list_head *head, size_t len) # # The robust futex implementation needs to maintain per-thread lists # of robust futexes which are unlocked when the thread exits. These # lists are managed in user space, the kernel is only notified about # the location of the head of the list. # # set_robust_list sets the head of the list of robust futexes owned # by the current thread to head. len is the size of *head. set_robust_list = EmulatedSyscall(x86=311, x64=273) get_robust_list = EmulatedSyscall(x86=312, x64=274, arg2="typename Arch::unsigned_word", arg3="typename Arch::size_t") # ssize_t splice(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, #size_t len, unsigned int flags); # # splice() moves data between two file descriptors without copying # between kernel address space and user address space. It transfers # up to len bytes of data from the file descriptor fd_in to the file # descriptor fd_out, where one of the descriptors must refer to a # pipe. # # NB: the documentation doesn't mention it explicitly, but the |off| # params are actually inout params, and are updated with the new file # offset on return. # # NOTE: Technically, the following implementation is unsound for # programs that splice with stdin/stdout/stderr and have output # redirected during replay. But, *crickets*. splice = IrregularEmulatedSyscall(x86=313, x64=275) sync_file_range = UnsupportedSyscall(x86=314, x64=277) tee = UnsupportedSyscall(x86=315, x64=276) vmsplice = UnsupportedSyscall(x86=316, x64=278) move_pages = UnsupportedSyscall(x86=317, x64=279) getcpu = UnsupportedSyscall(x86=318, x64=309) epoll_pwait = UnsupportedSyscall(x86=319, x64=281) # int utimensat(int dirfd, const char *pathname, const struct timespec #times[2], int flags); # # utimensat() and futimens() update the timestamps of a file with # nanosecond precision. This contrasts with the historical utime(2) # and utimes(2), which permit only second and microsecond precision, # respectively, when setting file timestamps. utimensat = EmulatedSyscall(x86=320, x64=280) # int signalfd(int fd, const sigset_t *mask, int flags); # There are two underlying Linux system calls: signalfd() and the more # recent signalfd4(). The former system call does not implement a flags # argument. The latter system call implements the flags values described # above. Starting with glibc 2.9, the signalfd() wrapper function will # use signalfd4() where it is available. signalfd = EmulatedSyscall(x86=321, x64=282) # int timerfd_create(int clockid, int flags); # # timerfd_create() creates a new timer object, and returns a file # descriptor that refers to that timer. timerfd_create = EmulatedSyscall(x86=322, x64=283) eventfd = UnsupportedSyscall(x86=323, x64=284) # int fallocate(int fd, int mode, off_t offset, off_t len); # # fallocate() allows the caller to directly manipulate the allocated # disk space for the file referred to by fd for the byte range # starting at offset and continuing for len bytes fallocate = EmulatedSyscall(x86=324, x64=285) # int timerfd_settime(int fd, int flags, # const struct itimerspec *new_value, # struct itimerspec *old_value); # # timerfd_settime() arms (starts) or disarms (stops) the timer # referred to by the file descriptor fd. timerfd_settime = EmulatedSyscall(x86=325, x64=286, arg4="typename Arch::itimerspec") # int timerfd_gettime(int fd, struct itimerspec *curr_value); # # timerfd_gettime() returns, in curr_value, an itimerspec structure # that contains the current setting of the timer referred to by the # file descriptor fd. timerfd_gettime = EmulatedSyscall(x86=326, x64=287, arg2="typename Arch::itimerspec") # int signalfd(int fd, const sigset_t *mask, int flags); # There are two underlying Linux system calls: signalfd() and the more # recent signalfd4(). The former system call does not implement a flags # argument. The latter system call implements the flags values described # above. Starting with glibc 2.9, the signalfd() wrapper function will # use signalfd4() where it is available. signalfd4 = EmulatedSyscall(x86=327, x64=289) # int eventfd(unsigned int initval, int flags); # # eventfd() creates an "eventfd object" that can be used as an event # wait/notify mechanism by userspace applications, and by the kernel # to notify userspace applications of events. The object contains an # unsigned 64-bit integer (uint64_t) counter that is maintained by # the kernel. This counter is initialized with the value specified # in the argument initval. eventfd2 = EmulatedSyscall(x86=328, x64=290) # int epoll_create1(int flags); # # epoll_create1() is very similar to epoll_create. They are identical # if the passed flag value is 0, they are completely identical. The # flag argument can be used to set the close-on-exec flag on the new # file descriptor. epoll_create1 = EmulatedSyscall(x86=329, x64=291) dup3 = IrregularEmulatedSyscall(x86=330, x64=292) # int pipe2(int pipefd[2], int flags) # # If flags is 0, then pipe2() is the same as pipe(). The following # values can be bitwise ORed in flags to obtain different behavior... pipe2 = EmulatedSyscall(x86=331, x64=293, arg1="int[2]") inotify_init1 = EmulatedSyscall(x86=332, x64=294) preadv = IrregularEmulatedSyscall(x86=333, x64=295) pwritev = EmulatedSyscall(x86=334, x64=296) # int rt_sigqueueinfo(pid_t tgid, int sig, siginfo_t *uinfo); # int rt_tgsigqueueinfo(pid_t tgid, pid_t tid, int sig, # siginfo_t *uinfo); # # The rt_sigqueueinfo() and rt_tgsigqueueinfo() system calls are the # low-level interfaces used to send a signal plus data to a process # or thread. The receiver of the signal can obtain the accompanying # data by establishing a signal handler with the sigaction(2) # SA_SIGINFO flag. rt_sigqueueinfo = EmulatedSyscall(x86=178, x64=129) rt_tgsigqueueinfo = EmulatedSyscall(x86=335, x64=297) # int perf_event_open(struct perf_event_attr *attr, # pid_t pid, int cpu, int group_fd, # unsigned long flags); # # Given a list of parameters, perf_event_open() returns a file # descriptor, for use in subsequent system calls (read(2), mmap(2), # prctl(2), fcntl(2), etc.). perf_event_open = EmulatedSyscall(x86=336, x64=298) # int recvmmsg(int sockfd, struct mmsghdr *msgvec, # unsigned int vlen, unsigned int flags, # struct timespec *timeout); # # The recvmmsg() system call is an extension of recvmsg(2) that # allows the caller to receive multiple messages from a socket using # a single system call. (This has performance benefits for some # applications.) A further extension over recvmsg(2) is support for # a timeout on the receive operation. recvmmsg = IrregularEmulatedSyscall(x86=337, x64=299) fanotify_init = UnsupportedSyscall(x86=338, x64=300) fanotify_mark = UnsupportedSyscall(x86=339, x64=301) # int prlimit(pid_t pid, int resource, const struct rlimit *new_limit, struct #rlimit *old_limit); # # The Linux-specific prlimit() system call combines and extends the # functionality of setrlimit() and getrlimit(). It can be used to # both set and get the resource limits of an arbitrary process. # # NOTE: We should execute this system call, since this system call # can set a limit on the stack size that will trigger a synchronous SIGSEGV, # and we expect synchronous SIGSEGVs to be triggered by the kernel # during replay. prlimit64 = EmulatedSyscall(x86=340, x64=302, arg4="typename Arch::rlimit64") name_to_handle_at = UnsupportedSyscall(x86=341, x64=303) open_by_handle_at = UnsupportedSyscall(x86=342, x64=304) clock_adjtime = UnsupportedSyscall(x86=343, x64=305) syncfs = EmulatedSyscall(x86=344, x64=306) # int sendmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, # unsigned int flags); # # The sendmmsg() system call is an extension of sendmsg(2) that # allows the caller to transmit multiple messages on a socket using a # single system call. (This has performance benefits for some # applications.) sendmmsg = IrregularEmulatedSyscall(x86=345, x64=307) setns = UnsupportedSyscall(x86=346, x64=308) process_vm_readv = UnsupportedSyscall(x86=347, x64=310) process_vm_writev = UnsupportedSyscall(x86=348, x64=311) kcmp = UnsupportedSyscall(x86=349, x64=312) finit_module = UnsupportedSyscall(x86=350, x64=313) sched_setattr = UnsupportedSyscall(x86=351, x64=314) sched_getattr = UnsupportedSyscall(x86=352, x64=315) renameat2 = UnsupportedSyscall(x86=353, x64=316) seccomp = IrregularEmulatedSyscall(x86=354, x64=317) getrandom = IrregularEmulatedSyscall(x86=355, x64=318) memfd_create = EmulatedSyscall(x86=356, x64=319) # restart_syscall is a little special. restart_syscall = RestartSyscall(x86=0, x64=219) rrcall_init_preload = IrregularEmulatedSyscall(x86=442, x64=442) rrcall_init_buffers = IrregularEmulatedSyscall(x86=443, x64=443) rrcall_notify_syscall_hook_exit = IrregularEmulatedSyscall(x86=444, x64=444) # These syscalls are subsumed under socketcall on x86. socket = EmulatedSyscall(x64=41) connect = EmulatedSyscall(x64=42) accept = IrregularEmulatedSyscall(x64=43) sendto = EmulatedSyscall(x64=44) recvfrom = IrregularEmulatedSyscall(x64=45) sendmsg = IrregularEmulatedSyscall(x64=46) recvmsg = IrregularEmulatedSyscall(x64=47) shutdown = EmulatedSyscall(x64=48) bind = EmulatedSyscall(x64=49) listen = EmulatedSyscall(x64=50) getsockname = IrregularEmulatedSyscall(x64=51) getpeername = IrregularEmulatedSyscall(x64=52) socketpair = EmulatedSyscall(x64=53, arg4="int[2]") setsockopt = EmulatedSyscall(x64=54) getsockopt = IrregularEmulatedSyscall(x64=55) accept4 = IrregularEmulatedSyscall(x64=288) # These syscalls are subsumed under ipc on x86. shmget = EmulatedSyscall(x64=29) shmat = IrregularEmulatedSyscall(x64=30) shmctl = IrregularEmulatedSyscall(x64=31) semget = EmulatedSyscall(x64=64) semop = IrregularEmulatedSyscall(x64=65) semctl = IrregularEmulatedSyscall(x64=66) shmdt = IrregularEmulatedSyscall(x64=67) msgget = EmulatedSyscall(x64=68) msgsnd = IrregularEmulatedSyscall(x64=69) msgrcv = IrregularEmulatedSyscall(x64=70) msgctl = IrregularEmulatedSyscall(x64=71) semtimedop = IrregularEmulatedSyscall(x64=220) # These syscalls simply don't exist on x86. arch_prctl = IrregularEmulatedSyscall(x64=158) tuxcall = InvalidSyscall(x64=184) security = InvalidSyscall(x64=185) epoll_ctl_old = UnsupportedSyscall(x64=214) epoll_wait_old = UnsupportedSyscall(x64=215) def _syscalls(): for name, obj in globals().iteritems(): if isinstance(obj, BaseSyscall): yield name, obj def all(): return list(_syscalls()) def for_arch(arch): for name, obj in all(): if getattr(obj, arch) is not None: yield name, obj rr-4.1.0/src/task.cc000066400000000000000000003313531265436462100142200ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "Task" #include "task.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "preload/preload_interface.h" #include "AutoRemoteSyscalls.h" #include "CPUIDBugDetector.h" #include "kernel_abi.h" #include "kernel_metadata.h" #include "kernel_supplement.h" #include "log.h" #include "MagicSaveDataMonitor.h" #include "PreserveFileMonitor.h" #include "RecordSession.h" #include "record_signal.h" #include "ReplaySession.h" #include "ScopedFd.h" #include "seccomp-bpf.h" #include "StdioMonitor.h" #include "StringVectorToCharArray.h" #include "util.h" static const unsigned int NUM_X86_DEBUG_REGS = 8; static const unsigned int NUM_X86_WATCHPOINTS = 4; using namespace rr; using namespace std; /** * Stores the table of signal dispositions and metadata for an * arbitrary set of tasks. Each of those tasks must own one one of * the |refcount|s while they still refer to this. */ struct Sighandler { Sighandler() : resethand(false) {} template void init_arch(const typename Arch::kernel_sigaction& ksa) { k_sa_handler = ksa.k_sa_handler; sa.resize(sizeof(ksa)); memcpy(sa.data(), &ksa, sizeof(ksa)); resethand = (ksa.sa_flags & SA_RESETHAND) != 0; takes_siginfo = (ksa.sa_flags & SA_SIGINFO) != 0; } template void reset_arch() { typename Arch::kernel_sigaction ksa; memset(&ksa, 0, sizeof(ksa)); static_assert((uintptr_t)SIG_DFL == 0, ""); init_arch(ksa); } bool ignored(int sig) const { if (sig == SIGSTOP || sig == SIGKILL) { // These can never be ignored return false; } return (uintptr_t)SIG_IGN == k_sa_handler.as_int() || ((uintptr_t)SIG_DFL == k_sa_handler.as_int() && IGNORE == default_action(sig)); } bool is_default() const { return (uintptr_t)SIG_DFL == k_sa_handler.as_int() && !resethand; } bool is_user_handler() const { static_assert(1 == (uintptr_t)SIG_IGN, ""); return k_sa_handler.as_int() & ~(uintptr_t)SIG_IGN; } remote_code_ptr get_user_handler() const { return is_user_handler() ? remote_code_ptr(k_sa_handler.as_int()) : remote_code_ptr(); } remote_ptr k_sa_handler; // Saved kernel_sigaction; used to restore handler vector sa; bool resethand; bool takes_siginfo; }; static void reset_handler(Sighandler* handler, SupportedArch arch) { RR_ARCH_FUNCTION(handler->reset_arch, arch); } struct Sighandlers { typedef shared_ptr shr_ptr; shr_ptr clone() const { shr_ptr s(new Sighandlers()); // NB: depends on the fact that Sighandler is for all // intents and purposes a POD type, though not // technically. for (size_t i = 0; i < array_length(handlers); ++i) { s->handlers[i] = handlers[i]; } return s; } Sighandler& get(int sig) { assert_valid(sig); return handlers[sig]; } const Sighandler& get(int sig) const { assert_valid(sig); return handlers[sig]; } void init_from_current_process() { for (size_t i = 0; i < array_length(handlers); ++i) { Sighandler& h = handlers[i]; NativeArch::kernel_sigaction sa; if (::syscall(SYS_rt_sigaction, i, nullptr, &sa, sizeof(sigset_t))) { /* EINVAL means we're querying an * unused signal number. */ assert(EINVAL == errno); assert(h.is_default()); continue; } h.init_arch(sa); } } /** * For each signal in |table| such that is_user_handler() is * true, reset the disposition of that signal to SIG_DFL, and * clear the resethand flag if it's set. SIG_IGN signals are * not modified. * * (After an exec() call copies the original sighandler table, * this is the operation required by POSIX to initialize that * table copy.) */ void reset_user_handlers(SupportedArch arch) { for (int i = 0; i < ssize_t(array_length(handlers)); ++i) { Sighandler& h = handlers[i]; // If the handler was a user handler, reset to // default. If it was SIG_IGN or SIG_DFL, // leave it alone. if (h.is_user_handler()) { reset_handler(&h, arch); } } } void assert_valid(int sig) const { assert(0 < sig && sig < ssize_t(array_length(handlers))); } static shr_ptr create() { return shr_ptr(new Sighandlers()); } Sighandler handlers[_NSIG]; private: Sighandlers() {} Sighandlers(const Sighandlers&); Sighandlers operator=(const Sighandlers&); }; void TaskGroup::destabilize() { LOG(debug) << "destabilizing task group " << tgid; for (auto it = task_set().begin(); it != task_set().end(); ++it) { Task* t = *it; t->unstable = true; LOG(debug) << " destabilized task " << t->tid; } } TaskGroup::TaskGroup(Session* session, TaskGroup* parent, pid_t tgid, pid_t real_tgid, uint32_t serial) : tgid(tgid), real_tgid(real_tgid), exit_code(-1), dumpable(true), session_(session), parent_(parent), serial(serial) { LOG(debug) << "creating new task group " << tgid << " (real tgid:" << real_tgid << ")"; if (parent) { parent->children.insert(this); } session->on_create(this); } TaskGroup::~TaskGroup() { if (session_) { session_->on_destroy(this); } for (TaskGroup* tg : children) { tg->parent_ = nullptr; } if (parent_) { parent_->children.erase(this); } } Task::Task(Session& session, pid_t _tid, pid_t _rec_tid, uint32_t serial, int _priority, SupportedArch a) : pseudo_blocked(false), succ_event_counter(), unstable(false), stable_exit(false), priority(_priority), in_round_robin_queue(false), emulated_stop_type(NOT_STOPPED), emulated_ptracer(nullptr), emulated_ptrace_stop_code(0), in_wait_type(WAIT_TYPE_NONE), scratch_ptr(), scratch_size(), flushed_syscallbuf(false), delay_syscallbuf_reset(false), // This will be initialized when the syscall buffer is. desched_fd_child(-1), seccomp_bpf_enabled(false), prctl_seccomp_status(0), hpc(_tid), tid(_tid), rec_tid(_rec_tid > 0 ? _rec_tid : _tid), own_namespace_rec_tid(0), syscallbuf_hdr(), num_syscallbuf_bytes(), stopping_breakpoint_table_entry_size(0), serial(serial), blocked_sigs(), prname("???"), ticks(0), registers(a), is_stopped(false), extra_registers(a), extra_registers_known(false), robust_futex_list(), robust_futex_list_len(), session_(&session), tid_futex(), top_of_stack(), wait_status(), seen_ptrace_exit_event(false) { push_event(Event(EV_SENTINEL, NO_EXEC_INFO, RR_NATIVE_ARCH)); } Task::~Task() { LOG(debug) << "task " << tid << " (rec:" << rec_tid << ") is dying ..."; if (emulated_ptracer) { emulated_ptracer->emulated_ptrace_tracees.erase(this); } for (Task* t : emulated_ptrace_tracees) { // XXX emulate PTRACE_O_EXITKILL ASSERT(this, t->emulated_ptracer == this); t->emulated_ptracer = nullptr; t->emulated_stop_type = NOT_STOPPED; } // We expect tasks to usually exit by a call to exit() or // exit_group(), so it's not helpful to warn about that. if (EV_SENTINEL != ev().type() && (pending_events.size() > 2 || !(ev().type() == EV_SYSCALL && (is_exit_syscall(ev().Syscall().number, ev().Syscall().regs.arch()) || is_exit_group_syscall(ev().Syscall().number, ev().Syscall().regs.arch()))))) { LOG(warn) << tid << " still has pending events. From top down:"; log_pending_events(); } session().on_destroy(this); tg->erase_task(this); as->erase_task(this); fds->erase_task(this); destroy_local_buffers(); // child_mem_fd needs to be valid since we won't be able to open // it for futex_wait below after we've detached. ASSERT(this, as->mem_fd().is_open()); fallible_ptrace(PTRACE_DETACH, nullptr, nullptr); if (unstable) { // In addition to problems described in the long // comment at the prototype of this function, unstable // exits may result in the kernel *not* clearing the // futex, for example for fatal signals. So we would // deadlock waiting on the futex. LOG(warn) << tid << " is unstable; not blocking on its termination"; // This will probably leak a zombie process for rr's lifetime. return; } ASSERT(this, seen_ptrace_exit_event); if (tg->task_set().empty() && !session().is_recording()) { // Reap the zombie. int ret = waitpid(tg->real_tgid, NULL, __WALL); if (ret == -1) { ASSERT(this, errno == ECHILD || errno == ESRCH); } else { ASSERT(this, ret == tg->real_tgid); } } if (!tid_futex.is_null() && as->task_set().size() > 0) { // clone()'d tasks can have a pid_t* |ctid| argument // that's written with the new task's pid. That // pointer can also be used as a futex: when the task // dies, the original ctid value is cleared and a // FUTEX_WAKE is done on the address. So // pthread_join() is basically a standard futex wait // loop. LOG(debug) << " waiting for tid futex " << tid_futex << " to be cleared ..."; futex_wait(tid_futex, 0); } else if (!tid_futex.is_null()) { // There are no other live tasks in this address // space, which means the address space just died // along with our exit. So we can't read the futex. LOG(debug) << " (can't futex_wait last task in vm)"; } LOG(debug) << " dead"; } bool Task::at_may_restart_syscall() const { ssize_t depth = pending_events.size(); const Event* prev_ev = depth > 2 ? &pending_events[depth - 2] : nullptr; return EV_SYSCALL_INTERRUPTION == ev().type() || (EV_SIGNAL_DELIVERY == ev().type() && prev_ev && EV_SYSCALL_INTERRUPTION == prev_ev->type()); } void Task::finish_emulated_syscall() { // XXX verify that this can't be interrupted by a breakpoint trap Registers r = regs(); remote_code_ptr ip = r.ip(); bool known_idempotent_insn_after_syscall = (is_in_traced_syscall() || is_in_untraced_syscall()); // We're about to single-step the tracee at its $ip just past // the syscall insn, then back up the $ip to where it started. // This is problematic because it will execute the insn at the // current $ip twice. If that insns isn't idempotent, then // replay will create side effects that diverge from // recording. // // To prevent that, we insert a breakpoint trap at the current // $ip. We can execute that without creating side effects. // After the single-step, we remove the breakpoint, which // restores the original insn at the $ip. // // Syscalls made from the syscallbuf are known to execute an // idempotent insn after the syscall trap (restore register // from stack), so we don't have to pay this expense. if (!known_idempotent_insn_after_syscall) { bool ok = vm()->add_breakpoint(ip, TRAP_BKPT_INTERNAL); ASSERT(this, ok) << "Can't add breakpoint???"; } // Passing RESUME_NO_TICKS here is not only a small performance optimization, // but also avoids counting an event if the instruction immediately following // a syscall instruction is a conditional branch. resume_execution(RESUME_SYSEMU_SINGLESTEP, RESUME_WAIT, RESUME_NO_TICKS); if (!known_idempotent_insn_after_syscall) { // The breakpoint should raise SIGTRAP, but we can also see // any of the host of replay-ignored signals. ASSERT(this, (pending_sig() == SIGTRAP || ReplaySession::is_ignored_signal(pending_sig()))) << "PENDING SIG IS " << signal_name(pending_sig()); vm()->remove_breakpoint(ip, TRAP_BKPT_INTERNAL); } set_regs(r); wait_status = 0; } const struct syscallbuf_record* Task::desched_rec() const { return (ev().is_syscall_event() ? ev().Syscall().desched_rec : (EV_DESCHED == ev().type()) ? ev().Desched().rec : nullptr); } bool Task::running_inside_desched() const { for (auto& e : pending_events) { if (e.type() == EV_DESCHED) { return e.Desched().rec != desched_rec(); } } return false; } void Task::destabilize_task_group() { tg->destabilize(); } void Task::set_emulated_ptracer(Task* tracer) { if (tracer) { ASSERT(this, !emulated_ptracer); emulated_ptracer = tracer; emulated_ptracer->emulated_ptrace_tracees.insert(this); } else { ASSERT(this, emulated_ptracer); ASSERT(this, emulated_stop_type == NOT_STOPPED || emulated_stop_type == GROUP_STOP); emulated_ptracer->emulated_ptrace_tracees.erase(this); emulated_ptracer = nullptr; } } bool Task::is_waiting_for_ptrace(Task* t) { // This task's process must be a ptracer of t. if (!t->emulated_ptracer || t->emulated_ptracer->tg != tg) { return false; } switch (in_wait_type) { case WAIT_TYPE_NONE: return false; case WAIT_TYPE_ANY: return true; case WAIT_TYPE_SAME_PGID: return getpgid(t->tgid()) == getpgid(tgid()); case WAIT_TYPE_PGID: return getpgid(t->tgid()) == in_wait_pid; case WAIT_TYPE_PID: // When waiting for a ptracee, a specific pid is interpreted as the // exact tid. return t->tid == in_wait_pid; default: ASSERT(this, false); return false; } } bool Task::is_waiting_for(Task* t) { // t must be a child of this task. if (t->tg->parent() != tg.get()) { return false; } switch (in_wait_type) { case WAIT_TYPE_NONE: return false; case WAIT_TYPE_ANY: return true; case WAIT_TYPE_SAME_PGID: return getpgid(t->tgid()) == getpgid(tgid()); case WAIT_TYPE_PGID: return getpgid(t->tgid()) == in_wait_pid; case WAIT_TYPE_PID: return t->tgid() == in_wait_pid; default: ASSERT(this, false); return false; } } bool Task::emulate_ptrace_stop(int code, EmulatedStopType stop_type) { ASSERT(this, emulated_stop_type == NOT_STOPPED); ASSERT(this, stop_type != NOT_STOPPED); if (!emulated_ptracer) { return false; } force_emulate_ptrace_stop(code, stop_type); return true; } void Task::force_emulate_ptrace_stop(int code, EmulatedStopType stop_type) { emulated_stop_type = stop_type; emulated_ptrace_stop_code = code; emulated_ptrace_SIGCHLD_pending = true; emulated_ptracer->send_synthetic_SIGCHLD_if_necessary(); // The SIGCHLD will eventually be reported to rr via a ptrace stop, // interrupting wake_task's syscall (probably a waitpid) if necessary. At // that point, we'll fix up the siginfo data with values that match what // the kernel would have delivered for a real ptracer's SIGCHLD. When the // signal handler (if any) returns, if wake_task was in a blocking wait that // wait will be resumed, at which point rec_prepare_syscall_arch will // discover the pending ptrace result and emulate the wait syscall to // return that result immediately. } void Task::send_synthetic_SIGCHLD_if_necessary() { Task* wake_task = nullptr; bool need_signal = false; for (Task* tracee : emulated_ptrace_tracees) { if (tracee->emulated_ptrace_SIGCHLD_pending) { need_signal = true; // check to see if any thread in the ptracer process is in a waitpid that // could read the status of 'tracee'. If it is, we should wake up that // thread. Otherwise we send SIGCHLD to the ptracer thread. for (Task* t : task_group()->task_set()) { if (t->is_waiting_for_ptrace(tracee)) { wake_task = t; break; } } if (wake_task) { break; } } } if (!need_signal) { return; } // ptrace events trigger SIGCHLD in the ptracer's wake_task. // We can't set all the siginfo values to their correct values here, so // we'll patch this up when the signal is received. // If there's already a pending SIGCHLD, this signal will be ignored, // but at some point the pending SIGCHLD will be delivered and then // send_synthetic_SIGCHLD_if_necessary will be called again to deliver a new // SIGCHLD if necessary. siginfo_t si; memset(&si, 0, sizeof(si)); si.si_code = SI_QUEUE; si.si_value.sival_int = SIGCHLD_SYNTHETIC; int ret; if (wake_task) { ASSERT(wake_task, !wake_task->is_sig_blocked(SIGCHLD)) << "Waiting task has SIGCHLD blocked so we have no way to wake it up " ":-("; // We must use the raw SYS_rt_tgsigqueueinfo syscall here to ensure the // signal is sent to the correct thread by tid. ret = syscall(SYS_rt_tgsigqueueinfo, wake_task->tgid(), wake_task->tid, SIGCHLD, &si); LOG(debug) << "Sending synthetic SIGCHLD to tid " << wake_task->tid; } else { // Send the signal to the process as a whole and let the kernel // decide which thread gets it. ret = syscall(SYS_rt_sigqueueinfo, tgid(), SIGCHLD, &si); LOG(debug) << "Sending synthetic SIGCHLD to pid " << tgid(); } ASSERT(this, ret == 0); } void Task::set_siginfo_for_synthetic_SIGCHLD(siginfo_t* si) { if (si->si_signo != SIGCHLD || si->si_value.sival_int != SIGCHLD_SYNTHETIC) { return; } for (Task* tracee : emulated_ptrace_tracees) { if (tracee->emulated_ptrace_SIGCHLD_pending) { tracee->emulated_ptrace_SIGCHLD_pending = false; si->si_code = CLD_TRAPPED; si->si_pid = tracee->tgid(); si->si_uid = tracee->getuid(); si->si_status = WSTOPSIG(tracee->emulated_ptrace_stop_code); si->si_value.sival_int = 0; return; } } } void Task::dump(FILE* out) const { out = out ? out : stderr; fprintf(out, " %s(tid:%d rec_tid:%d status:0x%x%s)<%p>\n", prname.c_str(), tid, rec_tid, wait_status, unstable ? " UNSTABLE" : "", this); if (session().is_recording()) { // TODO pending events are currently only meaningful // during recording. We should change that // eventually, to have more informative output. log_pending_events(); } } struct stat Task::stat_fd(int fd) { char path[PATH_MAX]; snprintf(path, sizeof(path) - 1, "/proc/%d/fd/%d", tid, fd); struct stat result; auto ret = ::stat(path, &result); ASSERT(this, ret == 0); return result; } ScopedFd Task::open_fd(int fd, int flags) { char path[PATH_MAX]; snprintf(path, sizeof(path) - 1, "/proc/%d/fd/%d", tid, fd); return ScopedFd(path, flags); } string Task::file_name_of_fd(int fd) { char path[PATH_MAX]; snprintf(path, sizeof(path) - 1, "/proc/%d/fd/%d", tid, fd); ssize_t nbytes = readlink(path, path, sizeof(path) - 1); ASSERT(this, nbytes >= 0); path[nbytes] = '\0'; return path; } void Task::futex_wait(remote_ptr futex, int val) { // Wait for *sync_addr == sync_val. This implementation isn't // pretty, but it's pretty much the best we can do with // available kernel tools. // // TODO: find clever way to avoid busy-waiting. while (true) { bool ok = true; int mem = read_mem(futex, &ok); if (!ok || val == mem) { // Invalid addresses are just ignored by the kernel break; } // Try to give our scheduling slot to the kernel // thread that's going to write sync_addr. sched_yield(); } } pid_t Task::get_ptrace_eventmsg_pid() { unsigned long msg = 0; // in theory we could hit an assertion failure if the tracee suffers // a SIGKILL before we get here. But the SIGKILL would have to be // precisely timed between the generation of a PTRACE_EVENT_FORK/CLONE/ // SYS_clone event, and us fetching the event message here. xptrace(PTRACE_GETEVENTMSG, nullptr, &msg); return (pid_t)msg; } uint16_t Task::get_ptrace_eventmsg_seccomp_data() { unsigned long data = 0; // in theory we could hit an assertion failure if the tracee suffers // a SIGKILL before we get here. But the SIGKILL would have to be // precisely timed between the generation of a PTRACE_EVENT_FORK/CLONE/ // SYS_clone event, and us fetching the event message here. xptrace(PTRACE_GETEVENTMSG, nullptr, &data); return data; } const siginfo_t& Task::get_siginfo() { assert(pending_sig()); return pending_siginfo; } void Task::set_siginfo(const siginfo_t& si) { pending_siginfo = si; ptrace_if_alive(PTRACE_SETSIGINFO, nullptr, (void*)&si); } TraceReader& Task::trace_reader() { return replay_session().trace_reader(); } TraceWriter& Task::trace_writer() { return record_session().trace_writer(); } RecordSession& Task::record_session() const { return *session().as_record(); } ReplaySession& Task::replay_session() const { return *session().as_replay(); } template void Task::init_buffers_arch(remote_ptr map_hint) { // NB: the tracee can't be interrupted with a signal while // we're processing the rrcall, because it's masked off all // signals. AutoRemoteSyscalls remote(this); // Arguments to the rrcall. remote_ptr > child_args = remote.regs().arg1(); auto args = read_mem(child_args); if (as->syscallbuf_enabled()) { init_syscall_buffer(remote, map_hint); args.syscallbuf_ptr = syscallbuf_child; desched_fd_child = args.desched_counter_fd; // Prevent the child from closing this fd fds->add_monitor(desched_fd_child, new PreserveFileMonitor()); if (session().is_recording()) { desched_fd = remote.retrieve_fd(desched_fd_child); } } else { args.syscallbuf_ptr = remote_ptr(nullptr); } // Return the mapped buffers to the child. write_mem(child_args, args); // The tracee doesn't need this addr returned, because it's // already written to the inout |args| param, but we stash it // away in the return value slot so that we can easily check // that we map the segment at the same addr during replay. remote.regs().set_syscall_result(syscallbuf_child); } void Task::init_buffers(remote_ptr map_hint) { RR_ARCH_FUNCTION(init_buffers_arch, arch(), map_hint); } void Task::destroy_buffers() { AutoRemoteSyscalls remote(this); remote.infallible_syscall(syscall_number_for_munmap(arch()), scratch_ptr, scratch_size); vm()->unmap(scratch_ptr, scratch_size); if (!syscallbuf_child.is_null()) { remote.infallible_syscall(syscall_number_for_munmap(arch()), syscallbuf_child, num_syscallbuf_bytes); vm()->unmap(syscallbuf_child, num_syscallbuf_bytes); if (desched_fd_child >= 0) { if (session().is_recording()) { remote.infallible_syscall(syscall_number_for_close(arch()), desched_fd_child); } fds->did_close(desched_fd_child); } } } bool Task::is_arm_desched_event_syscall() { return is_desched_event_syscall() && PERF_EVENT_IOC_ENABLE == regs().arg2(); } bool Task::is_desched_event_syscall() { return is_ioctl_syscall(regs().original_syscallno(), arch()) && desched_fd_child == (int)regs().arg1_signed(); } bool Task::is_disarm_desched_event_syscall() { return (is_desched_event_syscall() && PERF_EVENT_IOC_DISABLE == regs().arg2()); } bool Task::is_ptrace_seccomp_event() const { int event = ptrace_event(); return (PTRACE_EVENT_SECCOMP_OBSOLETE == event || PTRACE_EVENT_SECCOMP == event); } bool Task::is_sig_blocked(int sig) const { int sig_bit = sig - 1; if (sigsuspend_blocked_sigs) { return (*sigsuspend_blocked_sigs >> sig_bit) & 1; } return (blocked_sigs >> sig_bit) & 1; } void Task::set_sig_blocked(int sig) { int sig_bit = sig - 1; blocked_sigs |= (sig_set_t)1 << sig_bit; } bool Task::is_sig_ignored(int sig) const { return sighandlers->get(sig).ignored(sig); } bool Task::is_syscall_restart() { int syscallno = regs().original_syscallno(); bool is_restart = false; LOG(debug) << " is syscall interruption of recorded " << ev() << "? (now " << syscall_name(syscallno) << ")"; if (EV_SYSCALL_INTERRUPTION != ev().type()) { goto done; } /* It's possible for the tracee to resume after a sighandler * with a fresh syscall that happens to be the same as the one * that was interrupted. So we check here if the args are the * same. * * Of course, it's possible (but less likely) for the tracee * to incidentally resume with a fresh syscall that just * happens to have the same *arguments* too. But in that * case, we would usually set up scratch buffers etc the same * was as for the original interrupted syscall, so we just * save a step here. * * TODO: it's possible for arg structures to be mutated * between the original call and restarted call in such a way * that it might change the scratch allocation decisions. */ if (is_restart_syscall_syscall(syscallno, arch())) { is_restart = true; syscallno = ev().Syscall().number; LOG(debug) << " (SYS_restart_syscall)"; } if (ev().Syscall().number != syscallno) { LOG(debug) << " interrupted " << ev() << " != " << syscall_name(syscallno); goto done; } { const Registers& old_regs = ev().Syscall().regs; if (!(old_regs.arg1() == regs().arg1() && old_regs.arg2() == regs().arg2() && old_regs.arg3() == regs().arg3() && old_regs.arg4() == regs().arg4() && old_regs.arg5() == regs().arg5() && old_regs.arg6() == regs().arg6())) { LOG(debug) << " regs different at interrupted " << syscall_name(syscallno); goto done; } } is_restart = true; done: if (is_restart) { LOG(debug) << " restart of " << syscall_name(syscallno); } return is_restart; } void Task::log_pending_events() const { ssize_t depth = pending_events.size(); assert(depth > 0); if (1 == depth) { LOG(info) << "(no pending events)"; return; } /* The event at depth 0 is the placeholder event, which isn't * useful to log. Skip it. */ for (auto it = pending_events.rbegin(); it != pending_events.rend(); ++it) { it->log(); } } bool Task::may_be_blocked() const { return (EV_SYSCALL == ev().type() && PROCESSING_SYSCALL == ev().Syscall().state) || emulated_stop_type != NOT_STOPPED; } template void Task::on_syscall_exit_arch(int syscallno, const Registers& regs) { session().accumulate_syscall_performed(); // mprotect can change the protection status of some mapped regions before // failing. if (regs.syscall_failed() && !is_mprotect_syscall(syscallno, arch())) { return; } switch (syscallno) { case Arch::brk: case Arch::mmap: case Arch::mmap2: { LOG(debug) << "(brk/mmap/mmap2 will receive / has received direct processing)"; return; } case Arch::mprotect: { remote_ptr addr = regs.arg1(); size_t num_bytes = regs.arg2(); int prot = regs.arg3_signed(); return vm()->protect(addr, num_bytes, prot); } case Arch::mremap: { remote_ptr old_addr = regs.arg1(); size_t old_num_bytes = regs.arg2(); remote_ptr new_addr = regs.syscall_result(); size_t new_num_bytes = regs.arg3(); return vm()->remap(old_addr, old_num_bytes, new_addr, new_num_bytes); } case Arch::munmap: { remote_ptr addr = regs.arg1(); size_t num_bytes = regs.arg2(); return vm()->unmap(addr, num_bytes); } case Arch::shmdt: { remote_ptr addr = regs.arg1(); auto mapping = vm()->mapping_of(addr); ASSERT(this, mapping.map.start() == addr); return vm()->unmap(addr, mapping.map.end() - addr); } case Arch::madvise: { remote_ptr addr = regs.arg1(); size_t num_bytes = regs.arg2(); int advice = regs.arg3(); return vm()->advise(addr, num_bytes, advice); } case Arch::ipc: { switch ((int)regs.arg1_signed()) { case SHMDT: { remote_ptr addr = regs.arg5(); auto mapping = vm()->mapping_of(addr); ASSERT(this, mapping.map.start() == addr); return vm()->unmap(addr, mapping.map.end() - addr); } default: break; } break; } case Arch::set_robust_list: set_robust_list(regs.arg1(), (size_t)regs.arg2()); return; case Arch::set_thread_area: set_thread_area(regs.arg1()); return; case Arch::set_tid_address: set_tid_addr(regs.arg1()); return; case Arch::sigaction: case Arch::rt_sigaction: // TODO: SYS_signal update_sigaction(regs); return; case Arch::sigprocmask: case Arch::rt_sigprocmask: update_sigmask(regs); return; case Arch::dup: case Arch::dup2: case Arch::dup3: fd_table()->did_dup(regs.arg1(), regs.syscall_result()); return; case Arch::fcntl64: case Arch::fcntl: if (regs.arg2() == Arch::DUPFD || regs.arg2() == Arch::DUPFD_CLOEXEC) { fd_table()->did_dup(regs.arg1(), regs.syscall_result()); } return; case Arch::close: fd_table()->did_close(regs.arg1()); return; case Arch::unshare: if (regs.arg1() & CLONE_FILES) { fds->erase_task(this); fds = fds->clone(this); } return; case Arch::write: { int fd = (int)regs.arg1_signed(); vector ranges; ssize_t amount = regs.syscall_result_signed(); if (amount > 0) { ranges.push_back(FileMonitor::Range(regs.arg2(), amount)); } fd_table()->did_write(this, fd, ranges); return; } case Arch::writev: { int fd = (int)regs.arg1_signed(); vector ranges; auto iovecs = read_mem(remote_ptr(regs.arg2()), regs.arg3()); ssize_t written = regs.syscall_result_signed(); ASSERT(this, written >= 0); for (auto& v : iovecs) { ssize_t amount = min(written, v.iov_len); if (amount > 0) { ranges.push_back(FileMonitor::Range(v.iov_base, amount)); written -= amount; } } fd_table()->did_write(this, fd, ranges); return; } } } void Task::on_syscall_exit(int syscallno, const Registers& regs) { RR_ARCH_FUNCTION(on_syscall_exit_arch, arch(), syscallno, regs) } void Task::move_ip_before_breakpoint() { // TODO: assert that this is at a breakpoint trap. Registers r = regs(); r.set_ip(r.ip().decrement_by_bkpt_insn_length(arch())); set_regs(r); } void Task::advance_syscall() { while (true) { resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS); if (is_ptrace_seccomp_event()) { continue; } ASSERT(this, ptrace_event() == 0); if (!pending_sig()) { break; } if (ReplaySession::is_ignored_signal(pending_sig()) && session().is_replaying()) { continue; } ASSERT(this, session().is_recording()); stash_sig(); } } void Task::exit_syscall_and_prepare_restart() { Registers r = regs(); int syscallno = r.original_syscallno(); r.set_original_syscallno(syscall_number_for_gettid(r.arch())); set_regs(r); // This exits the hijacked SYS_gettid. Now the tracee is // ready to do our bidding. advance_syscall(); // Restore these regs to what they would have been just before // the tracee trapped at the syscall. r.set_original_syscallno(-1); r.set_syscallno(syscallno); r.set_ip(r.ip() - syscall_instruction_length(r.arch())); set_regs(r); } static string prname_from_exe_image(const string& e) { size_t last_slash = e.rfind('/'); return e.substr(last_slash == e.npos ? 0 : last_slash + 1); } static SupportedArch determine_arch(Task* t, const string& file_name) { ASSERT(t, file_name.size() > 0); switch (read_elf_class(file_name)) { case ELFCLASS32: return x86; case ELFCLASS64: ASSERT(t, NativeArch::arch() == x86_64) << "64-bit tracees not supported"; return x86_64; case NOT_ELF: // Probably a script. Optimistically assume the same architecture as // the rr binary. return NativeArch::arch(); default: ASSERT(t, false) << "Unknown ELF class"; return x86; } } static string exe_path(Task* t) { char proc_exe[PATH_MAX]; snprintf(proc_exe, sizeof(proc_exe), "/proc/%d/exe", t->tid); char exe[PATH_MAX]; ssize_t ret = readlink(proc_exe, exe, sizeof(exe) - 1); ASSERT(t, ret >= 0); exe[ret] = 0; return exe; } void Task::post_exec(const Registers* replay_regs, const ExtraRegisters* replay_extra_regs, const string* replay_exe) { /* We just saw a successful exec(), so from now on we know * that the address space layout for the replay tasks will * (should!) be the same as for the recorded tasks. So we can * start validating registers at events. */ session().post_exec(); as->erase_task(this); fds->erase_task(this); string exe_file = replay_exe ? *replay_exe : exe_path(this); registers.set_arch(determine_arch(this, exe_file)); extra_registers.set_arch(registers.arch()); // Read registers now that the architecture is known. struct user_regs_struct ptrace_regs; ptrace_if_alive(PTRACE_GETREGS, nullptr, &ptrace_regs); registers.set_from_ptrace(ptrace_regs); // Change syscall number to execve *for the new arch*. If we don't do this, // and the arch changes, then the syscall number for execve in the old arch/ // is treated as the syscall we're executing in the new arch, with hilarious // results. registers.set_original_syscallno(syscall_number_for_execve(arch())); set_regs(registers); if (!replay_regs) { ev().set_arch(arch()); ev().Syscall().number = registers.original_syscallno(); } // Clear robust_list state to match kernel state. If this task is cloned // soon after exec, we must not do a bogus set_robust_list syscall for // the clone. set_robust_list(nullptr, 0); syscallbuf_child = nullptr; syscallbuf_fds_disabled_child = nullptr; sighandlers = sighandlers->clone(); sighandlers->reset_user_handlers(arch()); thread_areas_.clear(); as = session().create_vm(this, exe_file, as->uid().exec_count() + 1); // It's barely-documented, but Linux unshares the fd table on exec fds = fds->clone(this); prname = prname_from_exe_image(as->exe_image()); if (replay_regs) { // Delay setting the replay_regs until here so the original registers // are set while we populate AddressSpace. We need that for the kernel // to identify the original stack region correctly. registers = *replay_regs; extra_registers = *replay_extra_regs; ASSERT(this, !extra_registers.empty()); set_regs(registers); } } void Task::post_exec_syscall(TraceTaskEvent& event) { as->post_exec_syscall(this); fds->update_for_cloexec(this, event); } void Task::record_current_event() { record_event(ev()); } void Task::pop_event(EventType expected_type) { ASSERT(this, pending_events.back().type() == expected_type); pending_events.pop_back(); } static bool record_extra_regs(const Event& ev) { switch (ev.type()) { case EV_SYSCALL: // sigreturn/rt_sigreturn restores register state return ev.Syscall().state == EXITING_SYSCALL && (is_sigreturn(ev.Syscall().number, ev.arch()) || is_execve_syscall(ev.Syscall().number, ev.arch())); case EV_SIGNAL_HANDLER: // entering a signal handler seems to clear FP/SSE regs, // so record these effects. return true; default: return false; } } /** * If the syscallbuf has just been flushed, and resetting hasn't been * overridden with a delay request, then record the reset event for * replay. */ void Task::maybe_reset_syscallbuf() { if (flushed_syscallbuf && !delay_syscallbuf_reset) { flushed_syscallbuf = false; LOG(debug) << "Syscallbuf reset"; reset_syscallbuf(); record_event(Event(EV_SYSCALLBUF_RESET, NO_EXEC_INFO, arch())); } } void Task::record_event(const Event& ev, FlushSyscallbuf flush) { if (flush == FLUSH_SYSCALLBUF) { maybe_flush_syscallbuf(); } TraceFrame frame(trace_writer().time(), tid, ev, tick_count()); if (ev.record_exec_info() == HAS_EXEC_INFO) { PerfCounters::Extra extra_perf_values; if (PerfCounters::extra_perf_counters_enabled()) { extra_perf_values = hpc.read_extra(); } frame.set_exec_info(regs(), PerfCounters::extra_perf_counters_enabled() ? &extra_perf_values : nullptr, record_extra_regs(ev) ? &extra_regs() : nullptr); } if (should_dump_memory(this, frame)) { dump_process_memory(this, frame.time(), "rec"); } if (should_checksum(this, frame)) { checksum_process_memory(this, frame.time()); } trace_writer().write_frame(frame); if (!ev.has_ticks_slop()) { ASSERT(this, flush == FLUSH_SYSCALLBUF); // After we've output an event, it's safe to reset the syscallbuf (if not // explicitly delayed) since we will have exited the syscallbuf code that // consumed the syscallbuf data. // This only works if the event has a reliable tick count so when we // reach it, we're done. maybe_reset_syscallbuf(); } } void Task::flush_inconsistent_state() { ticks = 0; } void Task::set_tick_count(Ticks count) { ticks = count; } void Task::record_local(remote_ptr addr, ssize_t num_bytes, const void* data) { maybe_flush_syscallbuf(); ASSERT(this, num_bytes >= 0); if (!addr) { return; } trace_writer().write_raw(data, num_bytes, addr); } void Task::record_remote(remote_ptr addr, ssize_t num_bytes) { maybe_flush_syscallbuf(); // We shouldn't be recording a scratch address. ASSERT(this, !addr || addr != scratch_ptr); assert(num_bytes >= 0); if (!addr) { return; } auto buf = read_mem(addr.cast(), num_bytes); trace_writer().write_raw(buf.data(), num_bytes, addr); } void Task::record_remote_fallible(remote_ptr addr, ssize_t num_bytes) { maybe_flush_syscallbuf(); // We shouldn't be recording a scratch address. ASSERT(this, !addr || addr != scratch_ptr); ASSERT(this, num_bytes >= 0); vector buf; if (!addr.is_null()) { buf.resize(num_bytes); ssize_t nread = read_bytes_fallible(addr, num_bytes, buf.data()); buf.resize(max(0, nread)); } trace_writer().write_raw(buf.data(), buf.size(), addr); } void Task::record_remote_even_if_null(remote_ptr addr, ssize_t num_bytes) { maybe_flush_syscallbuf(); // We shouldn't be recording a scratch address. ASSERT(this, !addr || addr != scratch_ptr); assert(num_bytes >= 0); if (!addr) { trace_writer().write_raw(nullptr, 0, addr); return; } auto buf = read_mem(addr.cast(), num_bytes); trace_writer().write_raw(buf.data(), num_bytes, addr); } void Task::record_remote_str(remote_ptr str) { maybe_flush_syscallbuf(); if (!str) { return; } string s = read_c_str(str); // Record the \0 byte. trace_writer().write_raw(s.c_str(), s.size() + 1, str); } string Task::read_c_str(remote_ptr child_addr) { // XXX handle invalid C strings string str; while (true) { // We're only guaranteed that [child_addr, // end_of_page) is mapped. remote_ptr end_of_page = ceil_page_size(child_addr + 1); ssize_t nbytes = end_of_page - child_addr; char buf[nbytes]; read_bytes_helper(child_addr, nbytes, buf); for (int i = 0; i < nbytes; ++i) { if ('\0' == buf[i]) { return str; } str += buf[i]; } child_addr = end_of_page; } } const Registers& Task::regs() const { ASSERT(this, is_stopped); return registers; } // 0 means XSAVE not detected static unsigned int xsave_area_size = 0; static bool xsave_initialized = false; static void init_xsave() { if (xsave_initialized) { return; } xsave_initialized = true; unsigned int eax, ecx, edx; cpuid(CPUID_GETFEATURES, 0, &eax, &ecx, &edx); if (!(ecx & (1 << 26))) { // XSAVE not present return; } // We'll use the largest possible area all the time // even when it might not be needed. Simpler that way. cpuid(CPUID_GETXSAVE, 0, &eax, &ecx, &edx); xsave_area_size = ecx; } const ExtraRegisters& Task::extra_regs() { if (!extra_registers_known) { init_xsave(); if (xsave_area_size) { LOG(debug) << " (refreshing extra-register cache using XSAVE)"; extra_registers.format_ = ExtraRegisters::XSAVE; extra_registers.data.resize(xsave_area_size); struct iovec vec = { extra_registers.data.data(), extra_registers.data.size() }; xptrace(PTRACE_GETREGSET, NT_X86_XSTATE, &vec); ASSERT(this, vec.iov_len == xsave_area_size) << "Didn't get enough register data; expected " << xsave_area_size << " but got " << vec.iov_len; } else { #if defined(__i386__) LOG(debug) << " (refreshing extra-register cache using FPXREGS)"; extra_registers.format_ = ExtraRegisters::XSAVE; extra_registers.data.resize(sizeof(user_fpxregs_struct)); xptrace(PTRACE_GETFPXREGS, nullptr, extra_registers.data.data()); #elif defined(__x86_64__) // x86-64 that doesn't support XSAVE; apparently Xeon E5620 (Westmere) // is in this class. LOG(debug) << " (refreshing extra-register cache using FPREGS)"; extra_registers.format_ = ExtraRegisters::XSAVE; extra_registers.data.resize(sizeof(user_fpregs_struct)); xptrace(PTRACE_GETFPREGS, nullptr, extra_registers.data.data()); #else #error need to define new extra_regs support #endif } extra_registers_known = true; } return extra_registers; } void Task::validate_regs(uint32_t flags) { /* don't validate anything before execve is done as the actual * process did not start prior to this point */ if (!session().can_validate()) { return; } Registers rec_regs = current_trace_frame().regs(); if (flags & IGNORE_ESI) { if (regs().arg4() != rec_regs.arg4()) { LOG(warn) << "Probably saw kernel bug mutating $esi across pread/write64 " "call: recorded:" << HEX(rec_regs.arg4()) << "; replaying:" << regs().arg4() << ". Fudging registers."; rec_regs.set_arg4(regs().arg4()); } } /* TODO: add perf counter validations (hw int, page faults, insts) */ Registers::compare_register_files(this, "replaying", regs(), "recorded", rec_regs, BAIL_ON_MISMATCH); } template static ReturnAddressList return_addresses_x86ish(Task* t) { ReturnAddressList result; // Immediately after a function call the return address is on the stack at // SP. After BP is pushed, but before it's initialized for the new stack // frame, the return address is on the stack at SP+wordsize. Just // capture those words now. We could inspect the code for known prologs/ // epilogs but that misses cases such as calling into optimized code // or PLT stubs (which start with 'jmp'). Since it doesn't matter if we // capture addresses that aren't real return addresses, just capture those // words unconditionally. typename Arch::size_t frame[2]; int next_address = 0; if (t->read_bytes_fallible(t->regs().sp(), sizeof(frame), frame) == sizeof(frame)) { result.addresses[0] = frame[0]; result.addresses[1] = frame[1]; next_address = 2; } remote_ptr bp = t->regs().bp(); for (int i = next_address; i < ReturnAddressList::COUNT; ++i) { if (t->read_bytes_fallible(bp, sizeof(frame), frame) != sizeof(frame)) { return result; } result.addresses[i] = frame[1]; bp = frame[0]; } return result; } ReturnAddressList Task::return_addresses() { switch (arch()) { case x86: case x86_64: RR_ARCH_FUNCTION(return_addresses_x86ish, arch(), this); default: ASSERT(this, "Unknown architecture"); return ReturnAddressList(); } } static ssize_t dr_user_word_offset(size_t i) { assert(i < NUM_X86_DEBUG_REGS); return offsetof(struct user, u_debugreg[0]) + sizeof(void*) * i; } uintptr_t Task::debug_status() { return fallible_ptrace(PTRACE_PEEKUSER, dr_user_word_offset(6), nullptr); } uintptr_t Task::consume_debug_status() { uintptr_t status = fallible_ptrace(PTRACE_PEEKUSER, dr_user_word_offset(6), nullptr); fallible_ptrace(PTRACE_POKEUSER, dr_user_word_offset(6), 0); return status; } void Task::replace_debug_status(uintptr_t status) { fallible_ptrace(PTRACE_POKEUSER, dr_user_word_offset(6), (void*)status); } remote_ptr Task::watchpoint_addr(size_t i) { assert(i < NUM_X86_WATCHPOINTS); return fallible_ptrace(PTRACE_PEEKUSER, dr_user_word_offset(i), nullptr); } void Task::remote_memcpy(remote_ptr dst, remote_ptr src, size_t num_bytes) { // XXX this could be more efficient uint8_t buf[num_bytes]; read_bytes_helper(src, num_bytes, buf); write_bytes_helper(dst, num_bytes, buf); } void Task::resume_execution(ResumeRequest how, WaitRequest wait_how, TicksRequest tick_period, int sig) { // Treat a 0 tick_period as a very large but finite number. // Always resetting here, and always to a nonzero number, improves // consistency between recording and replay and hopefully // makes counting bugs behave similarly between recording and // replay. // Accumulate any unknown stuff in tick_count(). if (tick_period != RESUME_NO_TICKS) { hpc.reset(tick_period == RESUME_UNLIMITED_TICKS ? 0xffffffff : tick_period); } LOG(debug) << "resuming execution with " << ptrace_req_name(how); breakpoint_set_where_execution_resumed = vm()->get_breakpoint_type_at_addr(ip()) != TRAP_NONE; ptrace_if_alive(how, nullptr, (void*)(uintptr_t)sig); is_stopped = false; extra_registers_known = false; if (RESUME_WAIT == wait_how) { wait(); } } const TraceFrame& Task::current_trace_frame() { return replay_session().current_trace_frame(); } ssize_t Task::set_data_from_trace() { auto buf = trace_reader().read_raw_data(); if (!buf.addr.is_null() && buf.data.size() > 0) { write_bytes_helper(buf.addr, buf.data.size(), buf.data.data()); } return buf.data.size(); } void Task::apply_all_data_records_from_trace() { TraceReader::RawData buf; while (trace_reader().read_raw_data_for_frame(current_trace_frame(), buf)) { if (!buf.addr.is_null() && buf.data.size() > 0) { write_bytes_helper(buf.addr, buf.data.size(), buf.data.data()); } } } void Task::set_return_value_from_trace() { Registers r = regs(); r.set_syscall_result(current_trace_frame().regs().syscall_result()); // In some cases (e.g. syscalls forced to return an error by tracee // seccomp filters) we need to emulate a change to the original_syscallno // (to -1 in that case). r.set_original_syscallno(current_trace_frame().regs().original_syscallno()); set_regs(r); } void Task::set_regs(const Registers& regs) { ASSERT(this, is_stopped); registers = regs; auto ptrace_regs = registers.get_ptrace(); ptrace_if_alive(PTRACE_SETREGS, nullptr, &ptrace_regs); } void Task::set_extra_regs(const ExtraRegisters& regs) { ASSERT(this, !regs.empty()) << "Trying to set empty ExtraRegisters"; extra_registers = regs; extra_registers_known = true; init_xsave(); switch (extra_registers.format()) { case ExtraRegisters::XSAVE: { if (xsave_area_size) { struct iovec vec = { extra_registers.data.data(), extra_registers.data.size() }; ptrace_if_alive(PTRACE_SETREGSET, NT_X86_XSTATE, &vec); } else { #if defined(__i386__) ptrace_if_alive(PTRACE_SETFPXREGS, nullptr, extra_registers.data.data()); #elif defined(__x86_64__) ptrace_if_alive(PTRACE_SETFPREGS, nullptr, extra_registers.data.data()); #else #error Unsupported architecture #endif } break; } default: ASSERT(this, false) << "Unexpected ExtraRegisters format"; } } enum WatchBytesX86 { BYTES_1 = 0x00, BYTES_2 = 0x01, BYTES_4 = 0x03, BYTES_8 = 0x02 }; static WatchBytesX86 num_bytes_to_dr_len(size_t num_bytes) { switch (num_bytes) { case 1: return BYTES_1; case 2: return BYTES_2; case 4: return BYTES_4; case 8: return BYTES_8; default: FATAL() << "Unsupported breakpoint size " << num_bytes; return WatchBytesX86(-1); // not reached } } bool Task::set_debug_regs(const DebugRegs& regs) { struct DebugControl { uintptr_t packed() { return *(uintptr_t*)this; } uintptr_t dr0_local : 1; uintptr_t dr0_global : 1; uintptr_t dr1_local : 1; uintptr_t dr1_global : 1; uintptr_t dr2_local : 1; uintptr_t dr2_global : 1; uintptr_t dr3_local : 1; uintptr_t dr3_global : 1; uintptr_t ignored : 8; WatchType dr0_type : 2; WatchBytesX86 dr0_len : 2; WatchType dr1_type : 2; WatchBytesX86 dr1_len : 2; WatchType dr2_type : 2; WatchBytesX86 dr2_len : 2; WatchType dr3_type : 2; WatchBytesX86 dr3_len : 2; } dr7 = { 0 }; static_assert(sizeof(DebugControl) == sizeof(uintptr_t), "Can't pack DebugControl"); // Reset the debug status since we're about to change the set // of programmed watchpoints. ptrace_if_alive(PTRACE_POKEUSER, dr_user_word_offset(6), 0); // Ensure that we clear the programmed watchpoints in case // enabling one of them fails. We guarantee atomicity to the // caller. ptrace_if_alive(PTRACE_POKEUSER, dr_user_word_offset(7), 0); if (regs.size() > NUM_X86_WATCHPOINTS) { return false; } size_t dr = 0; for (auto reg : regs) { if (fallible_ptrace(PTRACE_POKEUSER, dr_user_word_offset(dr), (void*)reg.addr.as_int())) { return false; } switch (dr++) { #define CASE_ENABLE_DR(_dr7, _i, _reg) \ case _i: \ _dr7.dr##_i##_local = 1; \ _dr7.dr##_i##_type = _reg.type; \ _dr7.dr##_i##_len = num_bytes_to_dr_len(_reg.num_bytes); \ break CASE_ENABLE_DR(dr7, 0, reg); CASE_ENABLE_DR(dr7, 1, reg); CASE_ENABLE_DR(dr7, 2, reg); CASE_ENABLE_DR(dr7, 3, reg); #undef CASE_ENABLE_DR default: FATAL() << "There's no debug register " << dr; } } return 0 == fallible_ptrace(PTRACE_POKEUSER, dr_user_word_offset(7), (void*)dr7.packed()); } uintptr_t Task::get_debug_reg(size_t regno) { errno = 0; auto result = fallible_ptrace(PTRACE_PEEKUSER, dr_user_word_offset(regno), nullptr); if (errno == ESRCH) { return 0; } return result; } void Task::set_thread_area(remote_ptr tls) { // We rely on the fact that user_desc is word-size-independent. auto desc = read_mem(tls); for (auto& t : thread_areas_) { if (t.entry_number == desc.entry_number) { t = desc; return; } } thread_areas_.push_back(desc); } void Task::set_tid_addr(remote_ptr tid_addr) { LOG(debug) << "updating cleartid futex to " << tid_addr; tid_futex = tid_addr; } void Task::signal_delivered(int sig) { Sighandler& h = sighandlers->get(sig); if (h.resethand) { reset_handler(&h, arch()); } if (!h.ignored(sig)) { switch (sig) { case SIGSTOP: case SIGTSTP: case SIGTTIN: case SIGTTOU: // All threads in the process are stopped. for (Task* t : tg->task_set()) { LOG(debug) << "setting " << tid << " to GROUP_STOP due to signal " << sig; t->emulated_stop_type = GROUP_STOP; } break; case SIGCONT: // All threads in the process are resumed. for (Task* t : tg->task_set()) { LOG(debug) << "setting " << tid << " to NOT_STOPPED due to signal " << sig; t->emulated_stop_type = NOT_STOPPED; } break; } } send_synthetic_SIGCHLD_if_necessary(); } bool Task::signal_has_user_handler(int sig) const { return sighandlers->get(sig).is_user_handler(); } remote_code_ptr Task::get_signal_user_handler(int sig) const { return sighandlers->get(sig).get_user_handler(); } const vector& Task::signal_action(int sig) const { return sighandlers->get(sig).sa; } bool Task::signal_handler_takes_siginfo(int sig) const { return sighandlers->get(sig).takes_siginfo; } void Task::stash_sig() { int sig = pending_sig(); ASSERT(this, sig); // Callers should avoid passing SYSCALLBUF_DESCHED_SIGNAL in here. ASSERT(this, sig != SYSCALLBUF_DESCHED_SIGNAL); // multiple non-RT signals coalesce if (sig < SIGRTMIN) { for (auto it = stashed_signals.begin(); it != stashed_signals.end(); ++it) { if (it->si_signo == sig) { LOG(debug) << "discarding stashed signal " << sig << " since we already have one pending"; return; } } } const siginfo_t& si = get_siginfo(); stashed_signals.push_back(si); wait_status = 0; } void Task::stash_synthetic_sig(const siginfo_t& si) { int sig = si.si_signo; assert(sig); // Callers should avoid passing SYSCALLBUF_DESCHED_SIGNAL in here. assert(sig != SYSCALLBUF_DESCHED_SIGNAL); // multiple non-RT signals coalesce if (sig < SIGRTMIN) { for (auto it = stashed_signals.begin(); it != stashed_signals.end(); ++it) { if (it->si_signo == sig) { LOG(debug) << "discarding stashed signal " << sig << " since we already have one pending"; return; } } } stashed_signals.push_back(si); } void Task::pop_stash_sig() { assert(has_stashed_sig()); stashed_signals.pop_front(); } siginfo_t Task::peek_stash_sig() { assert(has_stashed_sig()); return stashed_signals.front(); } void Task::save_ptrace_signal_siginfo(const siginfo_t& si) { for (auto it = saved_ptrace_siginfos.begin(); it != saved_ptrace_siginfos.end(); ++it) { if (it->si_signo == si.si_signo) { saved_ptrace_siginfos.erase(it); break; } } saved_ptrace_siginfos.push_back(si); } siginfo_t Task::take_ptrace_signal_siginfo(int sig) { for (auto it = saved_ptrace_siginfos.begin(); it != saved_ptrace_siginfos.end(); ++it) { if (it->si_signo == sig) { siginfo_t si = *it; saved_ptrace_siginfos.erase(it); return si; } } siginfo_t si; memset(&si, 0, sizeof(si)); si.si_signo = sig; return si; } const string& Task::trace_dir() const { const TraceStream* trace = trace_stream(); ASSERT(this, trace) << "Trace directory not available"; return trace->dir(); } uint32_t Task::trace_time() const { const TraceStream* trace = trace_stream(); return trace ? trace->time() : 0; } void Task::update_prname(remote_ptr child_addr) { struct prname_buf { char chars[16]; }; auto name = read_mem(child_addr.cast()); name.chars[sizeof(name.chars) - 1] = '\0'; prname = name.chars; } template void Task::update_sigaction_arch(const Registers& regs) { int sig = regs.arg1_signed(); remote_ptr new_sigaction = regs.arg2(); if (0 == regs.syscall_result() && !new_sigaction.is_null()) { // A new sighandler was installed. Update our // sighandler table. // TODO: discard attempts to handle or ignore signals // that can't be by POSIX typename Arch::kernel_sigaction sa; size_t sigset_size = min(sizeof(typename Arch::sigset_t), regs.arg4()); memset(&sa, 0, sizeof(sa)); read_bytes_helper( new_sigaction, sizeof(sa) - (sizeof(typename Arch::sigset_t) - sigset_size), &sa); sighandlers->get(sig).init_arch(sa); } } void Task::update_sigaction(const Registers& regs) { RR_ARCH_FUNCTION(update_sigaction_arch, regs.arch(), regs); } void Task::update_sigmask(const Registers& regs) { int how = regs.arg1_signed(); remote_ptr setp = regs.arg2(); if (regs.syscall_failed() || !setp) { return; } auto set = read_mem(setp); // Update the blocked signals per |how|. switch (how) { case SIG_BLOCK: blocked_sigs |= set; break; case SIG_UNBLOCK: blocked_sigs &= ~set; break; case SIG_SETMASK: blocked_sigs = set; break; default: FATAL() << "Unknown sigmask manipulator " << how; } } static bool is_zombie_process(pid_t pid) { char buf[1000]; sprintf(buf, "/proc/%d/status", pid); FILE* f = fopen(buf, "r"); if (!f) { // Something went terribly wrong. Just say it's a zombie // so we treat it as dead. return true; } static const char state_keyword[] = "State:"; while (fgets(buf, sizeof(buf), f)) { if (strncmp(buf, state_keyword, sizeof(state_keyword) - 1) == 0) { fclose(f); char* b = buf + sizeof(state_keyword) - 1; while (*b == ' ' || *b == '\t') { ++b; } return *b == 'Z'; } } fclose(f); // Something went terribly wrong. Just say it's a zombie // so we treat it as dead. return true; } static bool is_signal_triggered_by_ptrace_interrupt(int sig) { switch (sig) { case SIGTRAP: // We sometimes see SIGSTOP at interrupts, though the // docs don't mention that. case SIGSTOP: // We sometimes see 0 too... case 0: return true; default: return false; } } // This function doesn't really need to do anything. The signal will cause // waitpid to return EINTR and that's all we need. static void handle_alarm_signal(int sig) { LOG(debug) << "SIGALRM fired; maybe runaway tracee"; } static const int ptrace_exit_wait_status = (PTRACE_EVENT_EXIT << 16) | 0x857f; void Task::wait(AllowInterrupt allow_interrupt) { LOG(debug) << "going into blocking waitpid(" << tid << ") ..."; ASSERT(this, !unstable) << "Don't wait for unstable tasks"; // We only need this during recording. If tracees go runaway // during replay, something else is at fault. bool enable_wait_interrupt = session().is_recording(); int status; bool sent_wait_interrupt = false; pid_t ret; while (true) { if (enable_wait_interrupt) { // Where does the 3 seconds come from? No especially // good reason. We want this to be pretty high, // because it's a last-ditch recovery mechanism, not a // primary thread scheduler. Though in theory the // PTRACE_INTERRUPT's shouldn't interfere with other // events, that's hard to test thoroughly so try to // avoid it. alarm(3); } ret = waitpid(tid, &status, __WALL); if (enable_wait_interrupt) { alarm(0); } if (ret >= 0 || errno != EINTR) { // waitpid was not interrupted by the alarm. break; } if (is_zombie_process(tg->real_tgid)) { // The process is dead. We must stop waiting on it now // or we might never make progress. // XXX it's not clear why the waitpid() syscall // doesn't return immediately in this case, but in // some cases it doesn't return normally at all! // Fake a PTRACE_EVENT_EXIT for this task. status = ptrace_exit_wait_status; ret = tid; // XXX could this leave unreaped zombies lying around? break; } if (!sent_wait_interrupt && allow_interrupt == ALLOW_INTERRUPT) { ptrace_if_alive(PTRACE_INTERRUPT, nullptr, nullptr); sent_wait_interrupt = true; } } if (ret >= 0 && !stopped_from_status(status)) { // Unexpected non-stopping exit code returned in wait_status. // This shouldn't happen; a PTRACE_EXIT_EVENT for this task // should be observed first, and then we would kill the task // before wait()ing again, so we'd only see the exit // code in detach_and_reap. But somehow we see it here in // grandchild_threads and async_kill_with_threads tests (and // maybe others), when a PTRACE_EXIT_EVENT has not been sent. // Verify that we have not actually seen a PTRACE_EXIT_EVENT. ASSERT(this, !seen_ptrace_exit_event) << "A PTRACE_EXIT_EVENT was observed " "for this task, but somehow " "forgotten"; // Turn this into a PTRACE_EXIT_EVENT. status = ptrace_exit_wait_status; } LOG(debug) << " waitpid(" << tid << ") returns " << ret << "; status " << HEX(status); ASSERT(this, tid == ret) << "waitpid(" << tid << ") failed with " << ret; // If some other ptrace-stop happened to race with our // PTRACE_INTERRUPT, then let the other event win. We only // want to interrupt tracees stuck running in userspace. // We convert the ptrace-stop to a reschedule signal. if (sent_wait_interrupt && PTRACE_EVENT_STOP == ptrace_event_from_status(status) && is_signal_triggered_by_ptrace_interrupt(WSTOPSIG(status))) { LOG(warn) << "Forced to PTRACE_INTERRUPT tracee"; // Starve the runaway task of CPU time. It just got // the equivalent of hundreds of time slices. succ_event_counter = numeric_limits::max() / 2; status = (PerfCounters::TIME_SLICE_SIGNAL << 8) | 0x7f; siginfo_t si; memset(&si, 0, sizeof(si)); si.si_signo = PerfCounters::TIME_SLICE_SIGNAL; si.si_fd = hpc.ticks_fd(); si.si_code = POLL_IN; did_waitpid(status, &si); return; } if (sent_wait_interrupt) { LOG(warn) << " PTRACE_INTERRUPT raced with another event " << HEX(status); } did_waitpid(status); } static bool is_in_non_sigreturn_exit_syscall(Task* t) { return t->stop_sig() == (SIGTRAP | 0x80) && (!t->ev().is_syscall_event() || !is_sigreturn(t->ev().Syscall().number, t->arch())); } /** * Call this when we've trapped in a syscall (entry or exit) in the kernel, * to normalize registers. */ static void fixup_syscall_registers(Registers& registers) { if (registers.arch() == x86_64) { // x86-64 'syscall' instruction copies RFLAGS to R11 on syscall entry. // If we single-stepped into the syscall instruction, the TF flag will be // set in R11. We don't want the value in R11 to depend on whether we // were single-stepping during record or replay, possibly causing // divergence. // This doesn't matter when exiting a sigreturn syscall, since it // restores the original flags. // For untraced syscalls, the untraced-syscall entry point code (see // write_rr_page) does this itself. // We tried just clearing %r11, but that seemed to cause hangs in // Ubuntu/Debian kernels for some unknown reason. registers.set_r11(registers.r11() & ~X86_TF_FLAG); // x86-64 'syscall' instruction copies return address to RCX on syscall // entry. rr-related kernel activity normally sets RCX to -1 at some point // during syscall execution, but apparently in some (unknown) situations // probably involving untraced syscalls, that doesn't happen. To avoid // potential issues, forcibly replace RCX with -1 always. // This doesn't matter (and we should not do this) when exiting a // sigreturn syscall, since it will restore the original RCX and we don't // want to clobber that. // For untraced syscalls, the untraced-syscall entry point code (see // write_rr_page) does this itself. registers.set_cx((intptr_t)-1); // On kernel 3.13.0-68-generic #111-Ubuntu SMP we have observed a failed // execve() clearing all flags during recording. During replay we emulate // the exec so this wouldn't happen. Just reset all flags so everything's // consistent. // 0x246 is ZF+PF+IF+reserved, the result clearing a register using // "xor reg, reg". registers.set_flags(0x246); } else if (registers.arch() == x86) { // The x86 SYSENTER handling in Linux modifies EBP and EFLAGS on entry. // EBP is the potential sixth syscall parameter, stored on the user stack. // The EFLAGS changes are described here: // http://linux-kernel.2935.n7.nabble.com/ia32-sysenter-target-does-not-preserve-EFLAGS-td1074164.html // In a VMWare guest, the modifications to EFLAGS appear to be // nondeterministic. Cover that up by setting EFLAGS to reasonable values // now. registers.set_flags(0x246); } } void Task::emulate_syscall_entry(const Registers& regs) { Registers r = regs; if (r.arch() == x86_64) { r.set_r11(r.flags()); } fixup_syscall_registers(r); set_regs(r); } void Task::did_waitpid(int status, siginfo_t* override_siginfo) { Ticks more_ticks = hpc.read_ticks(); // Stop PerfCounters ASAP to reduce the possibility that due to bugs or // whatever they pick up something spurious later. hpc.stop(); ticks += more_ticks; session().accumulate_ticks_processed(more_ticks); LOG(debug) << " (refreshing register cache)"; intptr_t original_syscallno = registers.original_syscallno(); // Skip reading registers immediately after a PTRACE_EVENT_EXEC, since // we may not know the correct architecture. if (ptrace_event() != PTRACE_EVENT_EXEC) { struct user_regs_struct ptrace_regs; if (ptrace_if_alive(PTRACE_GETREGS, nullptr, &ptrace_regs)) { registers.set_from_ptrace(ptrace_regs); } else { status = ptrace_exit_wait_status; } } if (pending_sig_from_status(status)) { if (override_siginfo) { pending_siginfo = *override_siginfo; } else { if (!ptrace_if_alive(PTRACE_GETSIGINFO, nullptr, &pending_siginfo)) { status = ptrace_exit_wait_status; } } } is_stopped = true; wait_status = status; if (ptrace_event() == PTRACE_EVENT_EXIT) { seen_ptrace_exit_event = true; } bool need_to_set_regs = false; if (registers.singlestep_flag()) { registers.clear_singlestep_flag(); need_to_set_regs = true; } if (breakpoint_set_where_execution_resumed && pending_sig() == SIGTRAP && !ptrace_event()) { ASSERT(this, more_ticks == 0); // When we resume execution and immediately hit a breakpoint, the original // syscall number can be reset to -1. Undo that, so that the register // state matches the state we'd be in if we hadn't resumed. ReplayTimeline // depends on resume-at-a-breakpoint being a noop. registers.set_original_syscallno(original_syscallno); need_to_set_regs = true; } // When exiting a syscall, We need to normalize nondeterministic registers. if (is_in_non_sigreturn_exit_syscall(this)) { fixup_syscall_registers(registers); need_to_set_regs = true; } if (need_to_set_regs) { set_regs(registers); } } bool Task::try_wait() { int status; pid_t ret = waitpid(tid, &status, WNOHANG | __WALL | WSTOPPED); LOG(debug) << "waitpid(" << tid << ", NOHANG) returns " << ret << ", status " << HEX(wait_status); ASSERT(this, 0 <= ret) << "waitpid(" << tid << ", NOHANG) failed with " << ret; if (ret == tid) { did_waitpid(status); return true; } return false; } /** * Prepare this process and its ancestors for recording/replay by * preventing direct access to sources of nondeterminism, and ensuring * that rr bugs don't adversely affect the underlying system. */ static void set_up_process(Session& session) { /* TODO tracees can probably undo some of the setup below * ... */ /* CLOEXEC so that the original fd here will be closed by the exec that's * about to happen. */ int fd = open("/dev/null", O_WRONLY | O_CLOEXEC); if (0 > fd) { FATAL() << "error opening /dev/null"; } if (RR_MAGIC_SAVE_DATA_FD != dup2(fd, RR_MAGIC_SAVE_DATA_FD)) { FATAL() << "error duping to RR_MAGIC_SAVE_DATA_FD"; } /* CLOEXEC so that the original fd here will be closed by the exec that's * about to happen. */ fd = open("/", O_PATH | O_DIRECTORY | O_CLOEXEC); if (0 > fd) { FATAL() << "error opening root directory"; } if (RR_RESERVED_ROOT_DIR_FD != dup2(fd, RR_RESERVED_ROOT_DIR_FD)) { FATAL() << "error duping to RR_RESERVED_ROOT_DIR_FD"; } if (session.is_replaying()) { // This task and all its descendants should silently reap any terminating // children. signal(SIGCHLD, SIG_IGN); // If the rr process dies, prevent runaway tracee processes // from dragging down the underlying system. // // TODO: this isn't inherited across fork(). if (0 > prctl(PR_SET_PDEATHSIG, SIGKILL)) { FATAL() << "Couldn't set parent-death signal"; } // Put the replaying processes into their own session. This will stop // signals being sent to these processes by the terminal --- in particular // SIGTSTP/SIGINT/SIGWINCH. setsid(); } /* Trap to the rr process if a 'rdtsc' instruction is issued. * That allows rr to record the tsc and replay it * deterministically. */ if (0 > prctl(PR_SET_TSC, PR_TSC_SIGSEGV, 0, 0, 0)) { FATAL() << "error setting up prctl"; } if (0 > prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { FATAL() << "prctl(NO_NEW_PRIVS) failed, SECCOMP_FILTER is not available: your " "kernel is too old. Use `record -n` to disable the filter."; } } /** * This is called (and must be called) in the tracee after rr has taken * ptrace control. Otherwise, once we've installed the seccomp filter, * things go wrong because we have no ptracer and the seccomp filter demands * one. */ static void set_up_seccomp_filter(Session& session) { struct sock_fprog prog; if (session.is_recording() && session.as_record()->use_syscall_buffer()) { uintptr_t in_untraced_syscall_ip = AddressSpace::rr_page_ip_in_untraced_syscall().register_value(); uintptr_t in_untraced_replayed_syscall_ip = AddressSpace::rr_page_ip_in_untraced_replayed_syscall() .register_value(); uintptr_t privileged_in_untraced_syscall_ip = AddressSpace::rr_page_ip_in_privileged_untraced_syscall() .register_value(); assert(in_untraced_syscall_ip == uint32_t(in_untraced_syscall_ip)); assert(in_untraced_replayed_syscall_ip == uint32_t(in_untraced_replayed_syscall_ip)); assert(privileged_in_untraced_syscall_ip == uint32_t(privileged_in_untraced_syscall_ip)); struct sock_filter filter[] = { /* Allow all system calls from our untraced_syscall callsite */ ALLOW_SYSCALLS_FROM_CALLSITE(uint32_t(in_untraced_syscall_ip)), /* Allow all system calls from our untraced_syscall callsite */ ALLOW_SYSCALLS_FROM_CALLSITE(uint32_t(in_untraced_replayed_syscall_ip)), /* Allow all system calls from our privilged_untraced_syscall callsite */ ALLOW_SYSCALLS_FROM_CALLSITE(uint32_t(privileged_in_untraced_syscall_ip)), /* All the rest are handled in rr */ TRACE_PROCESS, }; prog.len = (unsigned short)(sizeof(filter) / sizeof(filter[0])); prog.filter = filter; } else { // Use a dummy filter that always generates ptrace traps. Supplying this // dummy filter makes ptrace-event behavior consistent whether or not // we enable syscall buffering, and more importantly, consistent whether // or not the tracee installs its own seccomp filter. struct sock_filter filter[] = { TRACE_PROCESS, }; prog.len = (unsigned short)(sizeof(filter) / sizeof(filter[0])); prog.filter = filter; } /* Note: the filter is installed only for record. This call * will be emulated in the replay */ if (0 > prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, (uintptr_t)&prog, 0, 0)) { FATAL() << "prctl(SECCOMP) failed, SECCOMP_FILTER is not available: your " "kernel is too old."; } /* anything that happens from this point on gets filtered! */ } int Task::pending_sig_from_status(int status) const { if (status == 0) { return 0; } int sig = stop_sig_from_status(status); switch (sig) { case SIGTRAP | 0x80: /* We ask for PTRACE_O_TRACESYSGOOD, so this was a * trap for a syscall. Pretend like it wasn't a * signal. */ return 0; case SIGTRAP: /* For a "normal" SIGTRAP, it's a ptrace trap if * there's a ptrace event. If so, pretend like we * didn't get a signal. Otherwise it was a genuine * TRAP signal raised by something else (most likely a * debugger breakpoint). */ return ptrace_event_from_status(status) ? 0 : SIGTRAP; default: /* XXX do we really get the high bit set on some * SEGVs? */ return sig & ~0x80; } } int Task::stop_sig_from_status(int status) const { ASSERT(const_cast(this), stopped_from_status(status)); return WSTOPSIG(status); } template static void set_thread_area_from_clone_arch(Task* t, remote_ptr tls) { if (Arch::clone_tls_type == Arch::UserDescPointer) { t->set_thread_area(tls.cast()); } } static void set_thread_area_from_clone(Task* t, remote_ptr tls) { RR_ARCH_FUNCTION(set_thread_area_from_clone_arch, t->arch(), t, tls); } Task* Task::clone(int flags, remote_ptr stack, remote_ptr tls, remote_ptr cleartid_addr, pid_t new_tid, pid_t new_rec_tid, uint32_t new_serial, Session* other_session) { auto& sess = other_session ? *other_session : session(); Task* t = new Task(sess, new_tid, new_rec_tid, new_serial, priority, arch()); t->blocked_sigs = blocked_sigs; t->prctl_seccomp_status = prctl_seccomp_status; if (CLONE_SHARE_SIGHANDLERS & flags) { t->sighandlers = sighandlers; } else { auto sh = sighandlers->clone(); t->sighandlers.swap(sh); } if (CLONE_SHARE_TASK_GROUP & flags) { t->tg = tg; } else { t->tg = sess.clone(t, tg); } t->tg->insert_task(t); if (CLONE_SHARE_VM & flags) { t->as = as; if (!stack.is_null()) { remote_ptr last_stack_byte = stack - 1; if (t->as->has_mapping(last_stack_byte)) { auto mapping = t->as->mapping_of(last_stack_byte); if (!mapping.recorded_map.is_heap()) { const KernelMapping& m = mapping.map; LOG(debug) << "mapping stack for " << new_tid << " at " << m; t->as->map(m.start(), m.size(), m.prot(), m.flags(), m.file_offset_bytes(), "[stack]", m.device(), m.inode()); } } } } else { t->as = sess.clone(t, as); } t->syscallbuf_fds_disabled_child = syscallbuf_fds_disabled_child; t->stopping_breakpoint_table = stopping_breakpoint_table; t->stopping_breakpoint_table_entry_size = stopping_breakpoint_table_entry_size; // FdTable is either shared or copied, so the contents of // syscallbuf_fds_disabled_child are still valid. if (CLONE_SHARE_FILES & flags) { t->fds = fds; t->fds->insert_task(t); } else { t->fds = fds->clone(t); } t->top_of_stack = stack; // Clone children, both thread and fork, inherit the parent // prname. t->prname = prname; if (CLONE_CLEARTID & flags) { LOG(debug) << "cleartid futex is " << cleartid_addr; assert(!cleartid_addr.is_null()); t->tid_futex = cleartid_addr; } else { LOG(debug) << "(clone child not enabling CLEARTID)"; } // wait() before trying to do anything that might need to // use ptrace to access memory t->wait(); t->open_mem_fd_if_needed(); t->thread_areas_ = thread_areas_; if (CLONE_SET_TLS & flags) { set_thread_area_from_clone(t, tls); } t->as->insert_task(t); if (!(CLONE_SHARE_VM & flags) && &session() == &t->session()) { as->did_fork_into(t); if (!syscallbuf_child.is_null()) { AutoRemoteSyscalls remote(t); // Unshare the syscallbuf memory so when we lock it below, we don't // also lock it in the task we cloned from! int prot = PROT_READ | PROT_WRITE; int flags = MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS; remote.infallible_mmap_syscall(syscallbuf_child, num_syscallbuf_bytes, prot, flags, -1, 0); t->vm()->map(syscallbuf_child, num_syscallbuf_bytes, prot, flags, 0, string(), KernelMapping::NO_DEVICE, KernelMapping::NO_INODE); // Mark the clone's syscallbuf as locked. This will prevent the // clone using syscallbuf until the clone reinitializes the // the buffer via its pthread_atfork handler. Otherwise the clone may // log syscalls to its copy of the syscallbuf and we won't know about // them since we don't have it mapped. // In some cases (e.g. vfork(), or raw SYS_fork syscall) the // pthread_atfork handler will never run. Syscallbuf will be permanently // disabled but that's OK, those cases are rare (and in the case of vfork, // tracees should immediately exit or exec anyway). t->write_mem(REMOTE_PTR_FIELD(syscallbuf_child, locked), uint8_t(1)); if (CLONE_SHARE_FILES & flags) { // Clear our desched_fd_child so that we don't try to close it. // It should only be closed in |this|. t->desched_fd_child = -1; } } } return t; } Task* Task::os_fork_into(Session* session) { AutoRemoteSyscalls remote(this); Task* child = os_clone(this, session, remote, rec_tid, serial, // Most likely, we'll be setting up a // CLEARTID futex. That's not done // here, but rather later in // |copy_state()|. // // We also don't use any of the SETTID // flags because that earlier work will // be copied by fork()ing the address // space. SIGCHLD); // When we forked ourselves, the child inherited the setup we // did to make the clone() call. So we have to "finish" the // remote calls (i.e. undo fudged state) in the child too, // even though we never made any syscalls there. remote.restore_state_to(child); return child; } Task* Task::os_clone_into(const CapturedState& state, Task* task_leader, AutoRemoteSyscalls& remote) { return os_clone(task_leader, &task_leader->session(), remote, state.rec_tid, state.serial, // We don't actually /need/ to specify the // SIGHAND/SYSVMEM flags because those things // are emulated in the tracee. But we use the // same flags as glibc to be on the safe side // wrt kernel bugs. // // We don't pass CLONE_SETTLS here *only* // because we'll do it later in // |copy_state()|. // // See |os_fork_into()| above for discussion // of the CTID flags. (CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM), state.top_of_stack); } template static void copy_tls_arch(const Task::CapturedState& state, AutoRemoteSyscalls& remote) { if (Arch::clone_tls_type == Arch::UserDescPointer) { for (const struct user_desc& t : state.thread_areas) { AutoRestoreMem remote_tls(remote, (const uint8_t*)&t, sizeof(t)); LOG(debug) << " setting tls " << remote_tls.get(); remote.infallible_syscall( syscall_number_for_set_thread_area(remote.arch()), remote_tls.get().as_int()); } } } static void copy_tls(const Task::CapturedState& state, AutoRemoteSyscalls& remote) { RR_ARCH_FUNCTION(copy_tls_arch, remote.arch(), state, remote); } Task::CapturedState Task::capture_state() { CapturedState state; state.rec_tid = rec_tid; state.serial = serial; state.regs = regs(); state.extra_regs = extra_regs(); state.prname = prname; state.robust_futex_list = robust_futex_list; state.robust_futex_list_len = robust_futex_list_len; state.thread_areas = thread_areas_; state.num_syscallbuf_bytes = num_syscallbuf_bytes; state.desched_fd_child = desched_fd_child; state.syscallbuf_child = syscallbuf_child; if (syscallbuf_hdr) { size_t data_size = syscallbuf_data_size(); if (syscallbuf_hdr->locked) { // There may be an incomplete syscall record after num_rec_bytes that // we need to capture here. We don't know how big that record is, // so just record the entire buffer. This should not be common. data_size = num_syscallbuf_bytes; } state.syscallbuf_hdr.resize(data_size); memcpy(state.syscallbuf_hdr.data(), syscallbuf_hdr, state.syscallbuf_hdr.size()); } state.syscallbuf_fds_disabled_child = syscallbuf_fds_disabled_child; state.scratch_ptr = scratch_ptr; state.scratch_size = scratch_size; state.wait_status = wait_status; state.blocked_sigs = blocked_sigs; state.pending_events = pending_events; state.ticks = ticks; state.tid_futex = tid_futex; state.top_of_stack = top_of_stack; return state; } void Task::copy_state(const CapturedState& state) { set_regs(state.regs); set_extra_regs(state.extra_regs); { AutoRemoteSyscalls remote(this); { char prname[16]; strncpy(prname, state.prname.c_str(), sizeof(prname)); AutoRestoreMem remote_prname(remote, (const uint8_t*)prname, sizeof(prname)); LOG(debug) << " setting name to " << prname; remote.infallible_syscall(syscall_number_for_prctl(arch()), PR_SET_NAME, remote_prname.get().as_int()); update_prname(remote_prname.get()); } if (!state.robust_futex_list.is_null()) { set_robust_list(state.robust_futex_list, state.robust_futex_list_len); } copy_tls(state, remote); thread_areas_ = state.thread_areas; tid_futex = state.tid_futex; ASSERT(this, !syscallbuf_child) << "Syscallbuf should not already be initialized in clone"; if (!state.syscallbuf_child.is_null()) { // All these fields are preserved by the fork. num_syscallbuf_bytes = state.num_syscallbuf_bytes; desched_fd_child = state.desched_fd_child; // The syscallbuf is mapped as a shared // segment between rr and the tracee. So we // have to unmap it, create a copy, and then // re-map the copy in rr and the tracee. init_syscall_buffer(remote, state.syscallbuf_child); ASSERT(this, state.syscallbuf_child == syscallbuf_child); // Ensure the copied syscallbuf has the same contents // as the old one, for consistency checking. memcpy(syscallbuf_hdr, state.syscallbuf_hdr.data(), state.syscallbuf_hdr.size()); } } syscallbuf_fds_disabled_child = state.syscallbuf_fds_disabled_child; // The scratch buffer (for now) is merely a private mapping in // the remote task. The CoW copy made by fork()'ing the // address space has the semantics we want. It's not used in // replay anyway. scratch_ptr = state.scratch_ptr; scratch_size = state.scratch_size; // Whatever |from|'s last wait status was is what ours would // have been. wait_status = state.wait_status; // These are only metadata that have been inferred from the // series of syscalls made by the trace so far. blocked_sigs = state.blocked_sigs; pending_events = state.pending_events; ticks = state.ticks; } void Task::destroy_local_buffers() { desched_fd.close(); munmap(syscallbuf_hdr, num_syscallbuf_bytes); } long Task::fallible_ptrace(int request, remote_ptr addr, void* data) { return ptrace(__ptrace_request(request), tid, addr, data); } void Task::open_mem_fd() { // Use ptrace to read/write during open_mem_fd as->set_mem_fd(ScopedFd()); // We could try opening /proc//mem directly first and // only do this dance if that fails. But it's simpler to // always take this path, and gives better test coverage. static const char path[] = "/proc/self/mem"; AutoRemoteSyscalls remote(this); long remote_fd; { AutoRestoreMem remote_path(remote, (const uint8_t*)path, sizeof(path)); // skip leading '/' since we want the path to be relative to the root fd remote_fd = remote.infallible_syscall(syscall_number_for_openat(arch()), RR_RESERVED_ROOT_DIR_FD, remote_path.get() + 1, O_RDWR); } as->set_mem_fd(remote.retrieve_fd(remote_fd)); ASSERT(this, as->mem_fd().is_open()); remote.infallible_syscall(syscall_number_for_close(arch()), remote_fd); } void Task::open_mem_fd_if_needed() { if (!as->mem_fd().is_open()) { open_mem_fd(); } } void Task::init_syscall_buffer(AutoRemoteSyscalls& remote, remote_ptr map_hint) { static int nonce = 0; // Create the segment we'll share with the tracee. char path[PATH_MAX]; snprintf(path, sizeof(path) - 1, SYSCALLBUF_SHMEM_PATH_PREFIX "%d-%d", tid, nonce++); // Let the child create the shmem block and then send the fd back to us. // This lets us avoid having to make the file world-writeable so that // the child can read it when it's in a different user namespace (which // would be a security hole, letting other users abuse rr users). int child_shmem_fd; { AutoRestoreMem child_path(remote, path); // skip leading '/' since we want the path to be relative to the root fd child_shmem_fd = remote.infallible_syscall( syscall_number_for_openat(arch()), RR_RESERVED_ROOT_DIR_FD, child_path.get() + 1, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, 0600); } /* Remove the fs name so that we don't have to worry about * cleaning up this segment in error conditions. */ unlink(path); ScopedFd shmem_fd = remote.retrieve_fd(child_shmem_fd); resize_shmem_segment(shmem_fd, SYSCALLBUF_BUFFER_SIZE); LOG(debug) << "created shmem segment " << path; // Map the segment in ours and the tracee's address spaces. void* map_addr; num_syscallbuf_bytes = SYSCALLBUF_BUFFER_SIZE; int prot = PROT_READ | PROT_WRITE; int flags = MAP_SHARED; if ((void*)-1 == (map_addr = mmap(nullptr, num_syscallbuf_bytes, prot, flags, shmem_fd, 0))) { FATAL() << "Failed to mmap shmem region"; } if (!map_hint.is_null()) { flags |= MAP_FIXED; } remote_ptr child_map_addr = remote.infallible_mmap_syscall( map_hint, num_syscallbuf_bytes, prot, flags, child_shmem_fd, 0); ASSERT(this, !syscallbuf_child) << "Should not already have syscallbuf initialized!"; syscallbuf_child = child_map_addr.cast(); syscallbuf_hdr = (struct syscallbuf_hdr*)map_addr; // No entries to begin with. memset(syscallbuf_hdr, 0, sizeof(*syscallbuf_hdr)); struct stat st; ASSERT(this, 0 == ::fstat(shmem_fd, &st)); vm()->map(child_map_addr, num_syscallbuf_bytes, prot, flags, 0, path, st.st_dev, st.st_ino); shmem_fd.close(); remote.infallible_syscall(syscall_number_for_close(arch()), child_shmem_fd); } void Task::tgkill(int sig) { ASSERT(this, 0 == syscall(SYS_tgkill, real_tgid(), tid, sig)); } void Task::reset_syscallbuf() { uint8_t* ptr = (uint8_t*)(syscallbuf_hdr + 1); memset(ptr, 0, syscallbuf_hdr->num_rec_bytes); syscallbuf_hdr->num_rec_bytes = 0; } void Task::maybe_flush_syscallbuf() { if (EV_SYSCALLBUF_FLUSH == ev().type()) { // Already flushing. return; } if (!syscallbuf_hdr) { return; } // This can be called while the task is not stopped, when we prematurely // terminate the trace. In that case, the tracee could be concurrently // modifying the header. We'll take a snapshot of the header now. // The syscallbuf code ensures that writes to syscallbuf records // complete before num_rec_bytes is incremented. struct syscallbuf_hdr hdr = *syscallbuf_hdr; ASSERT(this, !flushed_syscallbuf || flushed_num_rec_bytes == hdr.num_rec_bytes); if (!hdr.num_rec_bytes || flushed_syscallbuf) { // no records, or we've already flushed. return; } // Write the entire buffer in one shot without parsing it, // because replay will take care of that. push_event(Event(EV_SYSCALLBUF_FLUSH, NO_EXEC_INFO, arch())); if (is_stopped) { record_local(syscallbuf_child, syscallbuf_data_size(), syscallbuf_hdr); } else { vector buf; buf.resize(sizeof(hdr) + hdr.num_rec_bytes); memcpy(buf.data(), &hdr, sizeof(hdr)); memcpy(buf.data() + sizeof(hdr), syscallbuf_hdr + 1, hdr.num_rec_bytes); record_local(syscallbuf_child, buf.size(), buf.data()); } record_current_event(); pop_event(EV_SYSCALLBUF_FLUSH); flushed_syscallbuf = true; flushed_num_rec_bytes = hdr.num_rec_bytes; LOG(debug) << "Syscallbuf flushed with num_rec_bytes=" << (uint32_t)hdr.num_rec_bytes; } ssize_t Task::read_bytes_ptrace(remote_ptr addr, ssize_t buf_size, void* buf) { ssize_t nread = 0; // ptrace operates on the word size of the host, so we really do want // to use sizes of host types here. uintptr_t word_size = sizeof(long); errno = 0; // Only read aligned words. This ensures we can always read the last // byte before an unmapped region. while (nread < buf_size) { uintptr_t start = addr.as_int() + nread; uintptr_t start_word = start & ~(word_size - 1); uintptr_t end_word = start_word + word_size; uintptr_t length = std::min(end_word - start, uintptr_t(buf_size - nread)); long v = fallible_ptrace(PTRACE_PEEKDATA, start_word, nullptr); if (errno) { break; } memcpy(static_cast(buf) + nread, reinterpret_cast(&v) + (start - start_word), length); nread += length; } return nread; } ssize_t Task::write_bytes_ptrace(remote_ptr addr, ssize_t buf_size, const void* buf) { ssize_t nwritten = 0; // ptrace operates on the word size of the host, so we really do want // to use sizes of host types here. uintptr_t word_size = sizeof(long); errno = 0; // Only write aligned words. This ensures we can always write the last // byte before an unmapped region. while (nwritten < buf_size) { uintptr_t start = addr.as_int() + nwritten; uintptr_t start_word = start & ~(word_size - 1); uintptr_t end_word = start_word + word_size; uintptr_t length = std::min(end_word - start, uintptr_t(buf_size - nwritten)); long v; if (length < word_size) { v = fallible_ptrace(PTRACE_PEEKDATA, start_word, nullptr); if (errno) { break; } } memcpy(reinterpret_cast(&v) + (start - start_word), static_cast(buf) + nwritten, length); fallible_ptrace(PTRACE_POKEDATA, start_word, reinterpret_cast(v)); nwritten += length; } return nwritten; } ssize_t Task::read_bytes_fallible(remote_ptr addr, ssize_t buf_size, void* buf) { ASSERT(this, buf_size >= 0) << "Invalid buf_size " << buf_size; if (0 == buf_size) { return 0; } if (!as->mem_fd().is_open()) { return read_bytes_ptrace(addr, buf_size, buf); } ssize_t all_read = 0; while (all_read < buf_size) { errno = 0; ssize_t nread = pread64(as->mem_fd(), static_cast(buf) + all_read, buf_size - all_read, addr.as_int() + all_read); // We open the mem_fd just after being notified of // exec(), when the Task is created. Trying to read from that // fd seems to return 0 with errno 0. Reopening the mem fd // allows the pwrite to succeed. It seems that the first mem // fd we open, very early in exec, refers to some resource // that's different than the one we see after reopening the // fd, after exec. if (0 == nread && 0 == all_read && 0 == errno) { open_mem_fd(); continue; } if (nread <= 0) { if (all_read > 0) { // We did successfully read some data, so return success and ignore // any error. errno = 0; return all_read; } return nread; } // We read some data. We should try again in case we get short reads. all_read += nread; } return all_read; } void Task::read_bytes_helper(remote_ptr addr, ssize_t buf_size, void* buf, bool* ok) { // pread64 etc can't handle addresses that appear to be negative ... // like [vsyscall]. ssize_t nread = read_bytes_fallible(addr, buf_size, buf); if (nread != buf_size) { if (ok) { *ok = false; } else { ASSERT(this, false) << "Should have read " << buf_size << " bytes from " << addr << ", but only read " << nread; } } } bool Task::try_replace_pages(remote_ptr addr, ssize_t buf_size, const void* buf) { // Check that there are private-mapping pages covering the destination area. // The pages must all have the same prot and flags. uintptr_t page_size = sysconf(_SC_PAGESIZE); uintptr_t page_start = addr.as_int() & ~(page_size - 1); uintptr_t page_end = (addr.as_int() + buf_size + page_size - 1) & ~(page_size - 1); int all_prot, all_flags; for (uintptr_t p = page_start; p < page_end; p += page_size) { const KernelMapping& m = as->mapping_of(p).map; if (p > page_start) { if (all_prot != m.prot() || all_flags != m.flags()) { return false; } } else { all_prot = m.prot(); all_flags = m.flags(); } } if (!(all_flags & MAP_PRIVATE)) { return false; } auto cur = read_mem(remote_ptr(page_start), page_end - page_start); // XXX share this with AddressSpace.cc char path[] = "/tmp/rr-replaced-pages-XXXXXX"; ScopedFd fd(mkstemp(path)); ASSERT(this, fd.is_open()); ssize_t nwritten = write(fd, cur.data(), cur.size()); ASSERT(this, nwritten == (ssize_t)cur.size()); nwritten = pwrite(fd, buf, buf_size, addr.as_int() - page_start); ASSERT(this, nwritten == buf_size); AutoRemoteSyscalls remote(this); SupportedArch a = arch(); AutoRestoreMem child_path(remote, reinterpret_cast(path), sizeof(path)); // skip leading '/' since we want the path to be relative to the root fd int child_fd = remote.infallible_syscall(syscall_number_for_openat(a), RR_RESERVED_ROOT_DIR_FD, child_path.get() + 1, O_RDWR); ASSERT(this, child_fd >= 0); // Just map the new file right over the top of existing pages remote.infallible_mmap_syscall(page_start, cur.size(), all_prot, all_flags | MAP_FIXED, child_fd, 0); remote.infallible_syscall(syscall_number_for_close(a), child_fd); unlink(path); return true; } /** * This function exists to work around * https://bugzilla.kernel.org/show_bug.cgi?id=99101. * On some kernels pwrite() to /proc/.../mem fails when writing to a region * that's PROT_NONE. */ static ssize_t safe_pwrite64(Task* t, const void* buf, ssize_t buf_size, remote_ptr addr) { vector mappings_to_fix; for (auto m : t->vm()->maps_starting_at(floor_page_size(addr))) { if (m.map.start() >= ceil_page_size(addr + buf_size)) { break; } if (!(m.map.prot() & (PROT_READ | PROT_WRITE))) { mappings_to_fix.push_back(m.map); } }; if (mappings_to_fix.empty()) { return pwrite64(t->vm()->mem_fd(), buf, buf_size, addr.as_int()); } AutoRemoteSyscalls remote(t); int mprotect_syscallno = syscall_number_for_mprotect(t->arch()); for (auto& m : mappings_to_fix) { remote.infallible_syscall(mprotect_syscallno, m.start(), m.size(), m.prot() | PROT_WRITE); } ssize_t nwritten = pwrite64(t->vm()->mem_fd(), buf, buf_size, addr.as_int()); for (auto& m : mappings_to_fix) { remote.infallible_syscall(mprotect_syscallno, m.start(), m.size(), m.prot()); } return nwritten; } void Task::write_bytes_helper(remote_ptr addr, ssize_t buf_size, const void* buf, bool* ok) { ASSERT(this, buf_size >= 0) << "Invalid buf_size " << buf_size; if (0 == buf_size) { return; } if (!as->mem_fd().is_open()) { ssize_t nwritten = write_bytes_ptrace(addr, buf_size, buf); if (nwritten > 0) { vm()->notify_written(addr, nwritten); } if (ok && nwritten < buf_size) { *ok = false; } return; } errno = 0; ssize_t nwritten = safe_pwrite64(this, buf, buf_size, addr.as_int()); // See comment in read_bytes_helper(). if (0 == nwritten && 0 == errno) { open_mem_fd(); return write_bytes_helper(addr, buf_size, buf, ok); } if (errno == EPERM && try_replace_pages(addr, buf_size, buf)) { // Maybe a PaX kernel and we're trying to write to an executable page. vm()->notify_written(addr, buf_size); return; } if (ok) { if (nwritten < buf_size) { *ok = false; } } else { ASSERT(this, nwritten == buf_size) << "Should have written " << buf_size << " bytes to " << addr << ", but only wrote " << nwritten; } if (nwritten > 0) { vm()->notify_written(addr, nwritten); } } const TraceStream* Task::trace_stream() const { if (session().as_record()) { return &record_session().trace_writer(); } if (session().as_replay()) { return &replay_session().trace_reader(); } return nullptr; } void Task::xptrace(int request, remote_ptr addr, void* data) { errno = 0; fallible_ptrace(request, addr, data); ASSERT(this, !errno) << "ptrace(" << ptrace_req_name(request) << ", " << tid << ", addr=" << addr << ", data=" << data << ") failed with errno " << errno; } bool Task::ptrace_if_alive(int request, remote_ptr addr, void* data) { errno = 0; fallible_ptrace(request, addr, data); if (errno == ESRCH) { return false; } ASSERT(this, !errno) << "ptrace(" << ptrace_req_name(request) << ", " << tid << ", addr=" << addr << ", data=" << data << ") failed with errno " << errno; return true; } bool Task::clone_syscall_is_complete() { int event = ptrace_event(); if (PTRACE_EVENT_CLONE == event || PTRACE_EVENT_FORK == event) { return true; } ASSERT(this, !event) << "Unexpected ptrace event " << ptrace_event_name(event); // EAGAIN can happen here due to fork failing under load. The caller must // handle this. // XXX ENOSYS shouldn't happen here. intptr_t result = regs().syscall_result_signed(); ASSERT(this, regs().syscall_may_restart() || -ENOSYS == result || -EAGAIN == result || -ENOMEM == result) << "Unexpected task status " << HEX(status()) << " (" << syscall_name(regs().original_syscallno()) << " syscall errno: " << errno_name(-result) << ")"; return false; } template static void do_preload_init_arch(Task* t) { auto params = t->read_mem( remote_ptr >(t->regs().arg1())); remote_ptr syscallbuf_fds_disabled = params.syscallbuf_fds_disabled.rptr(); t->syscallbuf_fds_disabled_child = syscallbuf_fds_disabled.cast(); t->stopping_breakpoint_table = params.breakpoint_table.rptr().as_int(); t->stopping_breakpoint_table_entry_size = params.breakpoint_table_entry_size; t->write_mem(params.in_replay_flag.rptr(), (unsigned char)t->session().is_replaying()); } static void do_preload_init(Task* t) { RR_ARCH_FUNCTION(do_preload_init_arch, t->arch(), t); } void Task::at_preload_init() { do_preload_init(this); fd_table()->init_syscallbuf_fds_disabled(this); } template static void perform_remote_clone_arch( AutoRemoteSyscalls& remote, unsigned base_flags, remote_ptr stack, remote_ptr ptid, remote_ptr tls, remote_ptr ctid) { switch (Arch::clone_parameter_ordering) { case Arch::FlagsStackParentTLSChild: remote.syscall(Arch::clone, base_flags, stack, ptid.as_int(), tls.as_int(), ctid.as_int()); break; case Arch::FlagsStackParentChildTLS: remote.syscall(Arch::clone, base_flags, stack, ptid.as_int(), ctid.as_int(), tls.as_int()); break; } } static void perform_remote_clone(Task* parent, AutoRemoteSyscalls& remote, unsigned base_flags, remote_ptr stack, remote_ptr ptid, remote_ptr tls, remote_ptr ctid) { RR_ARCH_FUNCTION(perform_remote_clone_arch, parent->arch(), remote, base_flags, stack, ptid, tls, ctid); } /*static*/ Task* Task::os_clone(Task* parent, Session* session, AutoRemoteSyscalls& remote, pid_t rec_child_tid, uint32_t new_serial, unsigned base_flags, remote_ptr stack, remote_ptr ptid, remote_ptr tls, remote_ptr ctid) { perform_remote_clone(parent, remote, base_flags, stack, ptid, tls, ctid); while (!parent->clone_syscall_is_complete()) { // clone syscalls can fail with EAGAIN due to temporary load issues. // Just retry the system call until it succeeds. if (parent->regs().syscall_result_signed() == -EAGAIN) { perform_remote_clone(parent, remote, base_flags, stack, ptid, tls, ctid); } else { // XXX account for ReplaySession::is_ignored_signal? parent->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS); } } pid_t new_tid = parent->get_ptrace_eventmsg_pid(); parent->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS); Task* child = parent->clone(clone_flags_to_task_flags(base_flags), stack, tls, ctid, new_tid, rec_child_tid, new_serial, session); return child; } static void setup_fd_table(FdTable& fds) { fds.add_monitor(STDOUT_FILENO, new StdioMonitor(STDOUT_FILENO)); fds.add_monitor(STDERR_FILENO, new StdioMonitor(STDERR_FILENO)); fds.add_monitor(RR_MAGIC_SAVE_DATA_FD, new MagicSaveDataMonitor()); fds.add_monitor(RR_RESERVED_ROOT_DIR_FD, new PreserveFileMonitor()); } static void set_cpu_affinity(int cpu) { assert(cpu >= 0); cpu_set_t mask; CPU_ZERO(&mask); CPU_SET(cpu, &mask); if (0 > sched_setaffinity(0, sizeof(mask), &mask)) { FATAL() << "Couldn't bind to CPU " << cpu; } } /*static*/ Task* Task::spawn(Session& session, const TraceStream& trace, pid_t rec_tid) { assert(session.tasks().size() == 0); if (trace.bound_to_cpu() >= 0) { // Set CPU affinity now, after we've created any helper threads // (so they aren't affected), but before we create any // tracees (so they are all affected). // Note that we're binding rr itself to the same CPU as the // tracees, since this seems to help performance. set_cpu_affinity(trace.bound_to_cpu()); } pid_t tid; do { tid = fork(); // fork() can fail with EAGAIN due to temporary load issues. In such // cases, retry the fork(). } while (0 > tid && errno == EAGAIN); if (0 == tid) { // Set current working directory to the cwd used during // recording. The main effect of this is to resolve relative // paths in the following execvpe correctly during replay. chdir(trace.initial_cwd().c_str()); set_up_process(session); // The preceding code must run before sending SIGSTOP here, // since after SIGSTOP replay emulates almost all syscalls, but // we need the above syscalls to run "for real". // Signal to tracer that we're configured. ::kill(getpid(), SIGSTOP); // This code must run after rr has taken ptrace control. set_up_seccomp_filter(session); // We do a small amount of dummy work here to retire // some branches in order to ensure that the ticks value is // non-zero. The tracer can then check the ticks value // at the first ptrace-trap to see if it seems to be // working. int start = random() % 5; int num_its = start + 5; int sum = 0; for (int i = start; i < num_its; ++i) { sum += i; } syscall(SYS_write, -1, &sum, sizeof(sum)); CPUIDBugDetector::run_detection_code(); execvpe(trace.initial_exe().c_str(), StringVectorToCharArray(trace.initial_argv()).get(), StringVectorToCharArray(trace.initial_envp()).get()); // That failed. Try executing the file directly. execve(trace.initial_exe().c_str(), StringVectorToCharArray(trace.initial_argv()).get(), StringVectorToCharArray(trace.initial_envp()).get()); FATAL() << "Failed to exec '" << trace.initial_exe().c_str() << "'"; } if (0 > tid) { FATAL() << "Failed to fork for '" << trace.initial_exe().c_str() << "'"; } struct sigaction sa; sa.sa_handler = handle_alarm_signal; sigemptyset(&sa.sa_mask); sa.sa_flags = 0; // No SA_RESTART, so waitpid() will be interrupted sigaction(SIGALRM, &sa, nullptr); // Sync with the child process. // We minimize the code we run between fork()ing and PTRACE_SEIZE, because // any abnormal exit of the rr process will leave the child paused and // parented by the init process, i.e. effectively leaked. After PTRACE_SEIZE // with PTRACE_O_EXITKILL, the tracee will die if rr dies. intptr_t options = PTRACE_O_TRACESYSGOOD | PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK | PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC | PTRACE_O_TRACEVFORKDONE | PTRACE_O_TRACEEXIT | PTRACE_O_EXITKILL | PTRACE_O_TRACESECCOMP; long ret = ptrace(PTRACE_SEIZE, tid, nullptr, (void*)options); if (ret < 0 && errno == EINVAL) { // PTRACE_O_EXITKILL was added in kernel 3.8, and we only need // it for more robust cleanup, so tolerate not having it. options &= ~PTRACE_O_EXITKILL; ret = ptrace(PTRACE_SEIZE, tid, nullptr, (void*)options); } if (ret) { // Note that although the tracee may have died due to some fatal error, // we haven't reaped its exit code so there's no danger of killing // (or PTRACE_SEIZEing) the wrong process. kill(tid, SIGKILL); FATAL() << "PTRACE_SEIZE failed for tid " << tid; } Task* t = new Task(session, tid, rec_tid, session.next_task_serial(), 0, NativeArch::arch()); // The very first task we fork inherits the signal // dispositions of the current OS process (which should all be // default at this point, but ...). From there on, new tasks // will transitively inherit from this first task. auto sh = Sighandlers::create(); sh->init_from_current_process(); t->sighandlers.swap(sh); // Don't use the POSIX wrapper, because it doesn't necessarily // read the entire sigset tracked by the kernel. if (::syscall(SYS_rt_sigprocmask, SIG_SETMASK, nullptr, &t->blocked_sigs, sizeof(t->blocked_sigs))) { FATAL() << "Failed to read blocked signals"; } auto tg = session.create_tg(t); t->tg.swap(tg); auto as = session.create_vm(t, trace.initial_exe()); t->as.swap(as); t->fds = FdTable::create(t); setup_fd_table(*t->fds); // PTRACE_SEIZE is fundamentally racy by design. We depend on // stopping the tracee at a known location, so raciness is // bad. To resolve the race condition, we just keep running // the tracee until it reaches the known-safe starting point. // // Alternatively, it would be possible to remove the // requirement of the tracing beginning from a known point. while (true) { t->wait(DONT_ALLOW_INTERRUPT); if (SIGSTOP == t->stop_sig()) { break; } t->resume_execution(RESUME_CONT, RESUME_NONBLOCKING, RESUME_UNLIMITED_TICKS); } t->wait_status = 0; t->open_mem_fd(); return t; } string Task::syscall_name(int syscall) const { return ::syscall_name(syscall, arch()); } pid_t Task::find_newborn_thread() { ASSERT(this, session().is_recording()); ASSERT(this, ptrace_event() == PTRACE_EVENT_CLONE); pid_t hint = get_ptrace_eventmsg_pid(); char path[PATH_MAX]; sprintf(path, "/proc/%d/task/%d", tid, hint); struct stat stat_buf; // This should always succeed, but may fail in old kernels due to // a kernel bug. See RecordSession::handle_ptrace_event. if (!session().find_task(hint) && 0 == stat(path, &stat_buf)) { return hint; } sprintf(path, "/proc/%d/task", tid); DIR* dir = opendir(path); ASSERT(this, dir); while (true) { struct dirent* result; struct dirent entry; int ret = readdir_r(dir, &entry, &result); ASSERT(this, !ret && result == &entry); char* end; pid_t thread_tid = strtol(entry.d_name, &end, 10); if (*end == '\0' && !session().find_task(thread_tid)) { closedir(dir); return thread_tid; } } } static bool is_ppid_of(pid_t ppid, pid_t pid) { char path[PATH_MAX]; sprintf(path, "/proc/%d/status", pid); FILE* status = fopen(path, "r"); if (!status) { return false; } while (true) { char line[1024]; if (!fgets(line, sizeof(line), status)) { fclose(status); return false; } if (strncmp(line, "PPid:", 5) == 0) { fclose(status); char* end; int actual_ppid = strtol(line + 5, &end, 10); return *end == '\n' && actual_ppid == ppid; } } } pid_t Task::find_newborn_child_process() { ASSERT(this, session().is_recording()); ASSERT(this, ptrace_event() == PTRACE_EVENT_CLONE || ptrace_event() == PTRACE_EVENT_FORK); pid_t hint = get_ptrace_eventmsg_pid(); // This should always succeed, but may fail in old kernels due to // a kernel bug. See RecordSession::handle_ptrace_event. if (!session().find_task(hint) && is_ppid_of(real_tgid(), hint)) { return hint; } DIR* dir = opendir("/proc"); ASSERT(this, dir); while (true) { struct dirent* result; struct dirent entry; int ret = readdir_r(dir, &entry, &result); ASSERT(this, !ret && result == &entry); char* end; pid_t proc_tid = strtol(entry.d_name, &end, 10); if (*end == '\0' && !session().find_task(proc_tid) && is_ppid_of(real_tgid(), proc_tid)) { closedir(dir); return proc_tid; } } } rr-4.1.0/src/task.h000066400000000000000000001541051265436462100140600ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_TASK_H_ #define RR_TASK_H_ #include #include #include "preload/preload_interface.h" #include "AddressSpace.h" #include "Event.h" #include "ExtraRegisters.h" #include "FdTable.h" #include "kernel_abi.h" #include "kernel_supplement.h" #include "PerfCounters.h" #include "PropertyTable.h" #include "Registers.h" #include "remote_code_ptr.h" #include "TaskishUid.h" #include "TraceStream.h" #include "util.h" class AutoRemoteSyscalls; class RecordSession; class ReplaySession; class ScopedFd; class Session; struct Sighandlers; class Task; struct syscallbuf_hdr; struct syscallbuf_record; /** * A list of return addresses extracted from the stack. The tuple * (perfcounter ticks, regs, return addresses) may be needed to disambiguate * states that aren't unique in (perfcounter ticks, regs). * When return addresses can't be extracted, some suffix of the list may be * all zeroes. */ struct ReturnAddressList { enum { COUNT = 8 }; remote_ptr addresses[COUNT]; bool operator==(const ReturnAddressList& other) const { for (int i = 0; i < COUNT; ++i) { if (addresses[i] != other.addresses[i]) { return false; } } return true; } bool operator!=(const ReturnAddressList& other) const { return !(*this == other); } }; /** * Tracks a group of tasks with an associated ID, set from the * original "thread group leader", the child of |fork()| which became * the ancestor of all other threads in the group. Each constituent * task must own a reference to this. */ class TaskGroup : public HasTaskSet { public: TaskGroup(Session* session, TaskGroup* parent, pid_t tgid, pid_t real_tgid, uint32_t serial); ~TaskGroup(); typedef std::shared_ptr shr_ptr; /** See |Task::destabilize_task_group()|. */ void destabilize(); const pid_t tgid; const pid_t real_tgid; int exit_code; Session* session() const { return session_; } void forget_session() { session_ = nullptr; } TaskGroup* parent() { return parent_; } TaskGroupUid tguid() const { return TaskGroupUid(tgid, serial); } // We don't allow tasks to make themselves undumpable. If they try, // record that here and lie about it if necessary. bool dumpable; private: TaskGroup(const TaskGroup&) = delete; TaskGroup operator=(const TaskGroup&) = delete; Session* session_; /** Parent TaskGroup, or nullptr if it's not a tracee (rr or init). */ TaskGroup* parent_; std::set children; uint32_t serial; }; enum CloneFlags { /** * The child gets a semantic copy of all parent resources (and * becomes a new task group). This is the semantics of the * fork() syscall. */ CLONE_SHARE_NOTHING = 0, /** * Child will share the table of signal dispositions with its * parent. */ CLONE_SHARE_SIGHANDLERS = 1 << 0, /** Child will join its parent's task group. */ CLONE_SHARE_TASK_GROUP = 1 << 1, /** Child will share its parent's address space. */ CLONE_SHARE_VM = 1 << 2, /** Child will share its parent's file descriptor table. */ CLONE_SHARE_FILES = 1 << 3, /** Kernel will clear and notify tid futex on task exit. */ CLONE_CLEARTID = 1 << 4, // Set the thread area to what's specified by the |tls| arg. CLONE_SET_TLS = 1 << 5, }; /** * Enumeration of ways to resume execution. See the ptrace manual for * details of the semantics of these. * * We define a new datatype because the PTRACE_SYSEMU* requests aren't * part of the official ptrace API, and we want to use a strong type * for these resume requests to ensure callers don't confuse their * arguments. */ enum ResumeRequest { RESUME_CONT = PTRACE_CONT, RESUME_SINGLESTEP = PTRACE_SINGLESTEP, RESUME_SYSCALL = PTRACE_SYSCALL, RESUME_SYSEMU = PTRACE_SYSEMU, RESUME_SYSEMU_SINGLESTEP = PTRACE_SYSEMU_SINGLESTEP, }; enum WaitRequest { // After resuming, blocking-waitpid() until tracee status // changes. RESUME_WAIT, // Don't wait after resuming. RESUME_NONBLOCKING }; enum TicksRequest { // We don't expect to see any ticks (though we seem to on the odd buggy // system...). Using this is a small performance optimization because we don't // have to stop and restart the performance counters. This may also avoid // bugs on some systems that report performance counter advances while // in the kernel... RESUME_NO_TICKS = -2, RESUME_UNLIMITED_TICKS = -1 // Positive values are a request for an interrupt // after that number of ticks }; /** Different kinds of waits a task can do. */ enum WaitType { // Not waiting for anything WAIT_TYPE_NONE, // Waiting for any child process WAIT_TYPE_ANY, // Waiting for any child with the same process group ID WAIT_TYPE_SAME_PGID, // Waiting for any child with a specific process group ID WAIT_TYPE_PGID, // Waiting for a specific process ID WAIT_TYPE_PID }; /** Reasons why we simulate stopping of a task (see ptrace(2) man page). */ enum EmulatedStopType { NOT_STOPPED, GROUP_STOP, // stopped by a signal. This applies to non-ptracees too. SIGNAL_DELIVERY_STOP // Stopped before delivering a signal. ptracees only. }; /** * A "task" is a task in the linux usage: the unit of scheduling. (OS * people sometimes call this a "thread control block".) Multiple * tasks may share the same address space and file descriptors, in * which case they're commonly called "threads". Or two tasks may * have their own address spaces and file descriptors, in which case * they're called "processes". Both look the same to rr (on linux), * so no distinction is made here. */ class Task { friend class Session; friend class RecordSession; friend class ReplaySession; public: typedef std::vector DebugRegs; ~Task(); /** * Return true iff this is at an execution state where * resuming execution may lead to the restart of an * interrupted syscall. * * For example, if a signal without a user handler is about to * be delivered to this just after a syscall interruption, * then delivering the signal may restart the first syscall * and this method will return true. */ bool at_may_restart_syscall() const; /** * This must be in an emulated syscall, entered through * |cont_sysemu()| or |cont_sysemu_singlestep()|, but that's * not checked. If so, step over the system call instruction * to "exit" the emulated syscall. */ void finish_emulated_syscall(); /** * Shortcut to the most recent |pending_event->desched.rec| when * there's a desched event on the stack, and nullptr otherwise. * Exists just so that clients don't need to dig around in the * event stack to find this record. */ const struct syscallbuf_record* desched_rec() const; /** * Returns true when the task is in a signal handler in an interrupted * system call being handled by syscall buffering. */ bool running_inside_desched() const; size_t syscallbuf_data_size() const { return syscallbuf_hdr->num_rec_bytes + sizeof(*syscallbuf_hdr); } /** * Mark the members of this task's group as "unstable", * meaning that even though a task may look runnable, it * actually might not be. (And so |waitpid(-1)| should be * used to schedule the next task.) * * This is needed to handle the peculiarities of mass Task * death at exit_group() and upon receiving core-dumping * signals. The reason it's needed is easier to understand if * you keep in mind that the "main loop" of ptrace tracers is * /supposed/ to look like * * while (true) { * int tid = waitpid(-1, ...); * // do something with tid * ptrace(tid, PTRACE_SYSCALL, ...); * } * * That is, the tracer is supposed to let the kernel schedule * threads and then respond to notifications generated by the * kernel. * * Obviously this isn't how rr's recorder loop looks, because, * among other things, rr has to serialize thread execution. * Normally this isn't much of a problem. However, mass task * death is an exception. What happens at a mass task death * is a sequence of events like the following * * 1. A task calls exit_group() or is sent a core-dumping * signal. * 2. rr receives a PTRACE_EVENT_EXIT notification for the * task. * 3. rr detaches from the dying/dead task. * 4. Successive calls to waitpid(-1) generate additional * PTRACE_EVENT_EXIT notifications for each also-dead task * in the original task's thread group. Repeat (2) / (3) * for each notified task. * * So why destabilization? After (2), rr can't block on the * task shutting down (|waitpid(tid)|), because the kernel * harvests the LWPs of the dying task group in an unknown * order (which we shouldn't assume, even if we could guess * it). If rr blocks on the task harvest, it will (usually) * deadlock. * * And because rr doesn't know the order of tasks that will be * reaped, it doesn't know which of the dying tasks to * "schedule". If it guesses and blocks on another task in * the group's status-change, it will (usually) deadlock. * * So destabilizing a task group, from rr's perspective, means * handing scheduling control back to the kernel and not * trying to harvest tasks before detaching from them. * * NB: an invariant of rr scheduling is that all process * status changes happen as a result of rr resuming the * execution of a task. This is required to keep tracees in * known states, preventing events from happening "behind rr's * back". However, destabilizing a task group means that * these kinds of changes are possible, in theory. * * Currently, instability is a one-way street; it's only used * needed for death signals and exit_group(). */ void destabilize_task_group(); /** * Emulate 'tracer' ptracing this task. */ void set_emulated_ptracer(Task* tracer); /** * Call this when an event occurs that should stop a ptraced task. * If we're emulating ptrace of the task, stop the task and wake the ptracer * if it's waiting, and queue "code" as an status code to be reported to the * ptracer. * Returns true if the task is stopped-for-emulated-ptrace, false otherwise. */ bool emulate_ptrace_stop(int code, EmulatedStopType stop_type); /** * Force the ptrace-stop state no matter what state the task is currently in. */ void force_emulate_ptrace_stop(int code, EmulatedStopType stop_type); /** * Called when this task is able to receive a SIGCHLD (e.g. because * we completed delivery of a signal already). Sends a new synthetic * SIGCHLD to the task if there are still ptraced tasks that need a SIGCHLD * sent for them. */ void send_synthetic_SIGCHLD_if_necessary(); /** * Called when we're about to deliver a signal to this task. If it's a * synthetic SIGCHLD and there's a ptraced task that needs to SIGCHLD, * update the siginfo to reflect the status and note that that * ptraced task has had its SIGCHLD sent. */ void set_siginfo_for_synthetic_SIGCHLD(siginfo_t* si); /** * Returns true if this task is in a waitpid or similar that would return * when t's status changes due to a ptrace event. */ bool is_waiting_for_ptrace(Task* t); /** * Returns true if this task is in a waitpid or similar that would return * when t's status changes due to a regular event (exit). */ bool is_waiting_for(Task* t); /** * Dump attributes of this process, including pending events, * to |out|, which defaults to LOG_FILE. */ void dump(FILE* out = nullptr) const; /** * Called after the first exec in a session, when the session first * enters a consistent state. Prior to that, the task state * can vary based on how rr set up the child process. We have to flush * out any state that might have been affected by that. */ void flush_inconsistent_state(); /** * Return total number of ticks ever executed by this task. * Updates tick count from the current performance counter values if * necessary. */ Ticks tick_count() { return ticks; } /** * Set tick count to 'count'. */ void set_tick_count(Ticks count); /** * Return true if this exited because of a SYS_exit/exit_group * call. */ bool exited() const { return WIFEXITED(wait_status); } /** Return the event at the top of this's stack. */ Event& ev() { return pending_events.back(); } const Event& ev() const { return pending_events.back(); } /** * Stat |fd| in the context of this task's fd table. */ struct stat stat_fd(int fd); /** * Open |fd| in the context of this task's fd table. */ ScopedFd open_fd(int fd, int flags); /** * Get the name of the file referenced by |fd| in the context of this * task's fd table. */ std::string file_name_of_fd(int fd); /** * Force the wait status of this to |status|, as if * |wait()/try_wait()| had returned it. Call this whenever a waitpid * returned activity for this past. * If override_siginfo is non-null and status indicates a pending signal, * use *override_siginfo as the siginfo instead of reading it from the kernel. */ void did_waitpid(int status, siginfo_t* override_siginfo = nullptr); /** * Syscalls have side effects on registers (e.g. setting the flags register). * Perform those side effects on |regs| and do set_regs() on that to make it * look like a syscall happened. */ void emulate_syscall_entry(const Registers& regs); /** * Wait for |futex| in this address space to have the value * |val|. * * WARNING: this implementation semi-busy-waits for the value * change. This must only be used in contexts where the futex * will change "soon". */ void futex_wait(remote_ptr futex, int val); /** * Return the ptrace message pid associated with the current ptrace * event, f.e. the new child's pid at PTRACE_EVENT_CLONE. */ pid_t get_ptrace_eventmsg_pid(); uint16_t get_ptrace_eventmsg_seccomp_data(); /** * Return the siginfo at the signal-stop of this. * Not meaningful unless this is actually at a signal stop. */ const siginfo_t& get_siginfo(); /** * Set the siginfo for the signal-stop of this. */ void set_siginfo(const siginfo_t& si); /** * Return the trace we're either recording to (|trace_reader()|) * or replaying from (|trace_writer()|). */ TraceReader& trace_reader(); TraceWriter& trace_writer(); /** * Initialize tracee buffers in this, i.e., implement * RRCALL_init_syscall_buffer. This task must be at the point * of *exit from* the rrcall. Registers will be updated with * the return value from the rrcall, which is also returned * from this call. |map_hint| suggests where to map the * region; see |init_syscallbuf_buffer()|. * * Pass SHARE_DESCHED_EVENT_FD to additionally share that fd. */ void init_buffers(remote_ptr map_hint); /** * Destroy in the tracee task the scratch buffer and syscallbuf (if * syscallbuf_child is non-null). * This task must already be at a state in which remote syscalls can be * executed; if it's not, results are undefined. */ void destroy_buffers(); /** Return the current $ip of this. */ remote_code_ptr ip() { return regs().ip(); } /** * Return true if this is at an arm-desched-event syscall. */ bool is_arm_desched_event_syscall(); /** * Return true if this is at an arm-desched-event or * disarm-desched-event syscall. */ bool is_desched_event_syscall(); /** * Return true if this is at a disarm-desched-event syscall. */ bool is_disarm_desched_event_syscall(); /** * Return true when this is just before a syscall trap * instruction for a traced syscall made by the syscallbuf * code. Callers may assume |is_in_syscallbuf()| is implied * by this. */ bool is_entering_traced_syscall() { return ip() == as->traced_syscall_ip() || ip() == as->privileged_traced_syscall_ip(); } /** * Return true if this is within the syscallbuf library. This * *does not* imply that $ip is at a buffered syscall; see * below. */ bool is_in_syscallbuf() { remote_ptr p = ip().to_data_ptr(); return (as->syscallbuf_lib_start() <= p && p < as->syscallbuf_lib_end()) || (as->rr_page_start() <= p && p < as->rr_page_end()); } /** * Return true when this task is in a traced syscall made by the * syscallbuf code. Callers may assume |is_in_syscallbuf()| * is implied by this. Note that once we've entered the traced syscall, * ip() is immediately after the syscall instruction. */ bool is_in_traced_syscall() { return ip() == as->traced_syscall_ip().increment_by_syscall_insn_length( arch()) || ip() == as->privileged_traced_syscall_ip() .increment_by_syscall_insn_length(arch()); } /** * Return true when this task is in an untraced syscall, i.e. one * initiated by a function in the syscallbuf. Callers may * assume |is_in_syscallbuf()| is implied by this. Note that once we've * entered the traced syscall, ip() is immediately after the syscall * instruction. */ bool is_in_untraced_syscall() { return ip() == AddressSpace::rr_page_ip_in_untraced_syscall() || ip() == AddressSpace::rr_page_ip_in_untraced_replayed_syscall() || ip() == AddressSpace::rr_page_ip_in_privileged_untraced_syscall(); } /** * Return true if |ptrace_event()| is the trace event * generated by the syscallbuf seccomp-bpf when a traced * syscall is entered. */ bool is_ptrace_seccomp_event() const; /** Return true iff |sig| is blocked for this. */ bool is_sig_blocked(int sig) const; /** Set |sig| to be treated as blocked. */ void set_sig_blocked(int sig); /** * Return true iff |sig| is SIG_IGN, or it's SIG_DFL and the * default disposition is "ignore". */ bool is_sig_ignored(int sig) const; /** * Return true if the current state of this looks like the * interrupted syscall at the top of our event stack, if there * is one. */ bool is_syscall_restart(); /** Dump all pending events to the INFO log. */ void log_pending_events() const; /** * Return nonzero if |t| may not be immediately runnable, * i.e., resuming execution and then |waitpid()|'ing may block * for an unbounded amount of time. When the task is in this * state, the tracer must await a |waitpid()| notification * that the task is no longer possibly-blocked before resuming * its execution. */ bool may_be_blocked() const; /** * Call this hook just before exiting a syscall. Often Task * attributes need to be updated based on the finishing syscall. * Use 'regs' instead of this->regs() because some registers may not be * set properly in the task yet. */ void on_syscall_exit(int syscallno, const Registers& regs); /** * Assuming ip() is just past a breakpoint instruction, adjust * ip() backwards to point at that breakpoint insn. */ void move_ip_before_breakpoint(); /** * Assuming we've just entered a syscall, exit that syscall and reset * state to reenter the syscall just as it was called the first time. */ void exit_syscall_and_prepare_restart(); /** * Resume execution until we get a syscall entry or exit event. * During recording, any signals received are stashed. * seccomp events are ignored; we assume this syscall is under rr's control. */ void advance_syscall(); /** * Return the "task name"; i.e. what |prctl(PR_GET_NAME)| or * /proc/tid/comm would say that the task's name is. */ const std::string& name() const { return prname; } /** * Call this method when this task has just performed an |execve()| * (so we're in the new address space), but before the system call has * returned. * During replay replay_regs is non-null and contains the register values * recorded immediately after the exec. */ void post_exec(const Registers* replay_regs = nullptr, const ExtraRegisters* replay_extra_regs = nullptr, const std::string* replay_exe = nullptr); /** * Call this method when this task has exited a successful execve() syscall. * At this point it is safe to make remote syscalls. * |event| is the TraceTaskEvent (EXEC) that will be recorded or is being * replayed. */ void post_exec_syscall(TraceTaskEvent& event); /** * Manage pending events. |push_event()| pushes the given * event onto the top of the event stack. The |pop_*()| * helpers pop the event at top of the stack, which must be of * the specified type. */ void push_event(const Event& ev) { pending_events.push_back(ev); } void pop_event(EventType expected_type); void pop_noop() { pop_event(EV_NOOP); } void pop_desched() { pop_event(EV_DESCHED); } void pop_signal_delivery() { pop_event(EV_SIGNAL_DELIVERY); } void pop_signal_handler() { pop_event(EV_SIGNAL_HANDLER); } void pop_syscall() { pop_event(EV_SYSCALL); } void pop_syscall_interruption() { pop_event(EV_SYSCALL_INTERRUPTION); } /** * Read |N| bytes from |child_addr| into |buf|, or don't * return. */ template void read_bytes(remote_ptr child_addr, uint8_t(&buf)[N]) { return read_bytes_helper(child_addr, N, buf); } /** * Record an event on behalf of this. Record the registers of * this (and other relevant execution state) so that it can be * used or verified during replay, if that state is available * and meaningful at this's current execution point. * |record_current_event()| record |this->ev()|, and * |record_event()| records the specified event. */ void record_current_event(); enum FlushSyscallbuf { FLUSH_SYSCALLBUF, /* Pass this if it's safe to replay the event before we process the * syscallbuf records. */ DONT_FLUSH_SYSCALLBUF }; void record_event(const Event& ev, FlushSyscallbuf flush = FLUSH_SYSCALLBUF); /** * Save tracee data to the trace. |addr| is the address in * the address space of this task. The |record_local*()| * variants record data that's already been read from this, * and the |record_remote*()| variants read the data and then * record it. * If 'addr' is null then no record is written. */ void record_local(remote_ptr addr, ssize_t num_bytes, const void* buf); template void record_local(remote_ptr addr, const T* buf) { record_local(addr, sizeof(T), buf); } void record_remote(remote_ptr addr, ssize_t num_bytes); template void record_remote(remote_ptr addr) { record_remote(addr, sizeof(T)); } // Record as much as we can of the bytes in this range. void record_remote_fallible(remote_ptr addr, ssize_t num_bytes); /** * Save tracee data to the trace. |addr| is the address in * the address space of this task. * If 'addr' is null then a zero-length record is written. */ void record_remote_even_if_null(remote_ptr addr, ssize_t num_bytes); template void record_remote_even_if_null(remote_ptr addr) { record_remote_even_if_null(addr, sizeof(T)); } void record_remote_str(remote_ptr str); /** Return the current regs of this. */ const Registers& regs() const; /** Return the extra registers of this. */ const ExtraRegisters& extra_regs(); /** Return the current arch of this. This can change due to exec(). */ SupportedArch arch() const { // Use 'registers' directly instead of calling regs(), since this can // be called while the task is not stopped. return registers.arch(); } enum { /* The x86 linux 3.5.0-36 kernel packaged with Ubuntu * 12.04 has been observed to mutate $esi across * syscall entry/exit. (This has been verified * outside of rr as well; not an rr bug.) It's not * clear whether this is a ptrace bug or a kernel bug, * but either way it's not supposed to happen. So we * allow validate_args to cover up that bug. */ IGNORE_ESI = 0x01 }; /** Assert that the current register values match the values in the * current trace record. */ void validate_regs(uint32_t flags = 0); /** * Capture return addresses from this task's stack. The returned * address list may not be actual return addresses (in optimized code, * will probably not be), but they will be a function of the task's current * state, so may be useful for distinguishing this state from other states. */ ReturnAddressList return_addresses(); /** * Return the debug status, which is a bitfield comprising * |DebugStatus| bits (see above). */ uintptr_t debug_status(); /** * Return the debug status, which is a bitfield comprising * |DebugStatus| bits (see above), and clear the kernel state. */ uintptr_t consume_debug_status(); void replace_debug_status(uintptr_t status); /** * Return the address of the watchpoint programmed at slot * |i|. */ remote_ptr watchpoint_addr(size_t i); /** Return the current $sp of this. */ remote_ptr sp() { return regs().sp(); } /** * Read |val| from |child_addr|. * If the data can't all be read, then if |ok| is non-null * sets *ok to false, otherwise asserts. */ template T read_mem(remote_ptr child_addr, bool* ok = nullptr) { T val; read_bytes_helper(child_addr, sizeof(val), &val, ok); return val; } /** * Read |count| values from |child_addr|. */ template std::vector read_mem(remote_ptr child_addr, size_t count, bool* ok = nullptr) { std::vector v; v.resize(count); read_bytes_helper(child_addr, sizeof(T) * count, v.data(), ok); return v; } /** * Read and return the C string located at |child_addr| in * this address space. */ std::string read_c_str(remote_ptr child_addr); /** * Copy |num_bytes| from |src| to |dst| in the address space * of this. */ void remote_memcpy(remote_ptr dst, remote_ptr src, size_t num_bytes); template void remote_memcpy(remote_ptr dst, remote_ptr src) { remote_memcpy(dst, src, sizeof(T)); } /** * Resume execution |how|, deliverying |sig| if nonzero. * After resuming, |wait_how|. In replay, reset hpcs and * request a tick period of tick_period. The default value * of tick_period is 0, which means effectively infinite. * * You probably want to use one of the cont*() helpers above, * and not this. */ void resume_execution(ResumeRequest how, WaitRequest wait_how, TicksRequest tick_period, int sig = 0); /** Return the session this is part of. */ Session& session() const { return *session_; } RecordSession& record_session() const; ReplaySession& replay_session() const; const TraceFrame& current_trace_frame(); /** Restore the next chunk of saved data from the trace to this. */ ssize_t set_data_from_trace(); /** Restore all remaining chunks of saved data for the current trace frame. */ void apply_all_data_records_from_trace(); /** * Set the syscall-return-value register of this to what was * saved in the current trace frame. */ void set_return_value_from_trace(); /** Set the tracee's registers to |regs|. */ void set_regs(const Registers& regs); /** Set the tracee's extra registers to |regs|. */ void set_extra_regs(const ExtraRegisters& regs); /** * Program the debug registers to the vector of watchpoint * configurations in |reg| (also updating the debug control * register appropriately). Return true if all registers were * successfully programmed, false otherwise. Any time false * is returned, the caller is guaranteed that no watchpoint * has been enabled; either all of |regs| is enabled and true * is returned, or none are and false is returned. */ bool set_debug_regs(const DebugRegs& regs); /** * Reads the value of the given debug register. */ uintptr_t get_debug_reg(size_t regno); /** * Update the futex robust list head pointer to |list| (which * is of size |len|). */ void set_robust_list(remote_ptr list, size_t len) { robust_futex_list = list; robust_futex_list_len = len; } remote_ptr robust_list() const { return robust_futex_list; } size_t robust_list_len() const { return robust_futex_list_len; } /** Update the thread area to |addr|. */ void set_thread_area(remote_ptr tls); const std::vector& thread_areas() { return thread_areas_; } /** Update the clear-tid futex to |tid_addr|. */ void set_tid_addr(remote_ptr tid_addr); remote_ptr tid_addr() { return tid_futex; } /** * Call this after |sig| is delivered to this task. Emulate * sighandler updates induced by the signal delivery. */ void signal_delivered(int sig); /** Return true if this died because of a signal. */ bool signaled() const { return WIFSIGNALED(wait_status); } /** * Return true if the disposition of |sig| in |table| isn't * SIG_IGN or SIG_DFL, that is, if a user sighandler will be * invoked when |sig| is received. */ bool signal_has_user_handler(int sig) const; /** * If signal_has_user_handler(sig) is true, return the address of the * user handler, otherwise return null. */ remote_code_ptr get_signal_user_handler(int sig) const; /** * Return true if the signal handler for |sig| takes a siginfo_t* * parameter. */ bool signal_handler_takes_siginfo(int sig) const; /** * Return |sig|'s current sigaction. Returned as raw bytes since the * data is architecture-dependent. */ const std::vector& signal_action(int sig) const; /** * Stashed-signal API: if a signal becomes pending at an * awkward time, but could be handled "soon", call * |stash_sig()| to stash the current pending-signal state. * * |has_stashed_sig()| obviously returns true if |stash_sig()| * has been called successfully. * * |pop_stash_sig()| restores the (relevant) state of this * Task to what was saved in |stash_sig()|, and returns the * saved siginfo. After this call, |has_stashed_sig()| is * false. * * NB: |get_siginfo()| will always return the "real" siginfo, * regardless of stash popped-ness state. Callers must ensure * they do the right thing with the popped siginfo. * * If the process unexpectedly died (due to SIGKILL), we don't * stash anything. */ void stash_sig(); void stash_synthetic_sig(const siginfo_t& si); bool has_stashed_sig() const { return !stashed_signals.empty(); } siginfo_t peek_stash_sig(); void pop_stash_sig(); /** * When a signal triggers an emulated a ptrace-stop for this task, * save the siginfo so a later emulated ptrace-continue with this signal * number can use it. */ void save_ptrace_signal_siginfo(const siginfo_t& si); /** * When emulating a ptrace-continue with a signal number, extract the siginfo * that was saved by |save_ptrace_signal_siginfo|. If no such siginfo was * saved, make one up. */ siginfo_t take_ptrace_signal_siginfo(int sig); /** * Return true when the task is running, false if it's stopped. */ bool is_running() const { return !is_stopped; } /** * Return the status of this as of the last successful * wait()/try_wait() call. */ int status() const { return wait_status; } /** * Return true if this is at a signal-stop. If so, * |stop_sig()| returns the signal that stopped us. */ bool stopped() const { return stopped_from_status(wait_status); } int stop_sig() const { return stop_sig_from_status(wait_status); } /** * Return the ptrace event as of the last call to * |wait()/try_wait()|. */ int ptrace_event() const { return ptrace_event_from_status(wait_status); } /** * Return the signal that's pending for this as of the last * call to |wait()/try_wait()|. The signal 0 means "no * signals'. */ int pending_sig() const { return pending_sig_from_status(wait_status); } void clear_wait_status() { wait_status = 0; } /** Return the task group this belongs to. */ TaskGroup::shr_ptr task_group() { return tg; } /** Return the id of this task's recorded thread group. */ pid_t tgid() const { return tg->tgid; } /** Return id of real OS task group. */ pid_t real_tgid() const { return tg->real_tgid; } TaskUid tuid() const { return TaskUid(rec_tid, serial); } /** Return the dir of the trace we're using. */ const std::string& trace_dir() const; /** * Get the current "time" measured as ticks on recording trace * events. |task_time()| returns that "time" wrt this task * only. */ uint32_t trace_time() const; /** * Call this after the tracee successfully makes a * |prctl(PR_SET_NAME)| call to change the task name to the * string pointed at in the tracee's address space by * |child_addr|. */ void update_prname(remote_ptr child_addr); /** * Call this when SYS_sigaction is finishing with |regs|. */ void update_sigaction(const Registers& regs); /** * Call this when the tracee is about to complete a * SYS_rt_sigprocmask syscall with |regs|. */ void update_sigmask(const Registers& regs); /** * Call this before recording events or data. Records * syscallbuf data and flushes the buffer, if there's buffered * data. * * The timing of calls to this is tricky. We must flush the syscallbuf * before recording any data associated with events that happened after the * buffered syscalls. But we don't support flushing a syscallbuf twice with * no intervening reset, i.e. after flushing we have to be sure we'll get * a chance to reset the syscallbuf (i.e. record some other kind of event) * before the tracee runs again in a way that might append another buffered * syscall --- so we can't flush too early */ void maybe_flush_syscallbuf(); /** * Call this after recording an event when it might be safe to reset the * syscallbuf. It must be after recording an event to ensure during replay * we run past any syscallbuf after-syscall code that uses the buffer data. */ void maybe_reset_syscallbuf(); /** * Call this to reset syscallbuf_hdr->num_rec_bytes and zero out the data * recorded in the syscall buffer. This makes for more deterministic behavior * especially during replay, where during checkpointing we only save and * restore the recorded data area. */ void reset_syscallbuf(); /** * Return the virtual memory mapping (address space) of this * task. */ AddressSpace::shr_ptr vm() { return as; } FdTable::shr_ptr fd_table() { return fds; } enum AllowInterrupt { ALLOW_INTERRUPT, // Pass this when the caller has already triggered a ptrace stop // and wait() must not trigger a new one. DONT_ALLOW_INTERRUPT }; /** * Block until the status of this changes. wait() expects the wait to end * with the process in a stopped() state. */ void wait(AllowInterrupt allow_interrupt = ALLOW_INTERRUPT); /** * Return true if the status of this has changed, but don't * block. */ bool try_wait(); /** * Currently we don't allow recording across uid changes, so we can just * return rr's uid. */ uid_t getuid() { return ::getuid(); } /** * Write |N| bytes from |buf| to |child_addr|, or don't return. */ template void write_bytes(remote_ptr child_addr, const uint8_t(&buf)[N]) { write_bytes_helper(child_addr, N, buf); } /** * Write |val| to |child_addr|. */ template void write_mem(remote_ptr child_addr, const T& val, bool* ok = nullptr) { assert(type_has_no_holes()); write_bytes_helper(child_addr, sizeof(val), static_cast(&val), ok); } /** * This is not the helper you're looking for. See above: you * probably accidentally wrote |write_mem(addr, &foo)| when * you meant |write_mem(addr, foo)|. */ template void write_mem(remote_ptr child_addr, const T* val) = delete; template void write_mem(remote_ptr child_addr, const T* val, int count) { assert(type_has_no_holes()); write_bytes_helper(child_addr, sizeof(*val) * count, static_cast(val)); } /** * Don't use these helpers directly; use the safer and more * convenient variants above. * * Read/write the number of bytes that the template wrapper * inferred. */ ssize_t read_bytes_fallible(remote_ptr addr, ssize_t buf_size, void* buf); /** * If the data can't all be read, then if |ok| is non-null, sets *ok to * false, otherwise asserts. */ void read_bytes_helper(remote_ptr addr, ssize_t buf_size, void* buf, bool* ok = nullptr); void write_bytes_helper(remote_ptr addr, ssize_t buf_size, const void* buf, bool* ok = nullptr); /** See |pending_sig()| above. */ int pending_sig_from_status(int status) const; /** See |ptrace_event()| above. */ static int ptrace_event_from_status(int status) { return (0xFF0000 & status) >> 16; } /** See |stopped()| and |stop_sig()| above. */ static bool stopped_from_status(int status) { return WIFSTOPPED(status); } int stop_sig_from_status(int status) const; /** * Call this when performing a clone syscall in this task. Returns * true if the call completed, false if it was interrupted and * needs to be resumed. When the call returns true, the task is * stopped at a PTRACE_EVENT_CLONE or PTRACE_EVENT_FORK. */ bool clone_syscall_is_complete(); /** * Return the pid of the newborn thread created by this task. * Called when this task has a PTRACE_CLONE_EVENT with CLONE_THREAD. */ pid_t find_newborn_thread(); /** * Return the pid of the newborn process created by this task. * Called when this task has a PTRACE_CLONE_EVENT without CLONE_THREAD, * or PTRACE_FORK_EVENT. */ pid_t find_newborn_child_process(); /** * Called when SYS_rrcall_init_preload has happened. */ void at_preload_init(); /** * Open /proc/[tid]/mem fd for our AddressSpace, closing the old one * first. * This never fails. If necessary we force the tracee to open the file * itself and smuggle the fd back to us. */ void open_mem_fd(); /** * Calls open_mem_fd if this task's AddressSpace doesn't already have one. */ void open_mem_fd_if_needed(); /** * Do a tgkill to send a specific signal to this task. */ void tgkill(int sig); /** * Return the name of the given syscall. */ std::string syscall_name(int syscallno) const; /* State only used during recording. */ /* True when this is switchable for semantic purposes, but * definitely isn't blocked on ony resource. In that case, * it's safe for the scheduler to do a blocking waitpid on * this if our scheduling slot is open. */ bool pseudo_blocked; /* Number of times this context has been scheduled in a row, * which approximately corresponds to the number of events * it's processed in succession. The scheduler maintains this * state and uses it to make scheduling decisions. */ uint32_t succ_event_counter; /* True when any assumptions made about the status of this * process have been invalidated, and must be re-established * with a waitpid() call. Only applies to tasks which are dying, usually * due to a signal sent to the entire task group. */ bool unstable; /* exit(), or exit_group() with one task, has been called, so * the exit can be treated as stable. */ bool stable_exit; /* Task 'nice' value set by setpriority(2). We use this to drive scheduling decisions. rr's scheduler is deliberately simple and unfair; a task never runs as long as there's another runnable task with a lower nice value. */ int priority; /* Tasks with in_round_robin_queue set are in the session's * in_round_robin_queue instead of its task_priority_set. */ bool in_round_robin_queue; // The set of signals that were blocked during a sigsuspend. Only present // during the first EV_SIGNAL during an interrupted sigsuspend. std::unique_ptr sigsuspend_blocked_sigs; // If not NOT_STOPPED, then the task is logically stopped and this is the type // of stop. EmulatedStopType emulated_stop_type; // Task for which we're emulating ptrace of this task, or null Task* emulated_ptracer; // true if this task needs to send a SIGCHLD to its ptracer for its // emulated ptrace stop bool emulated_ptrace_SIGCHLD_pending; // if nonzero, code to deliver to ptracer when it waits int emulated_ptrace_stop_code; std::set emulated_ptrace_tracees; WaitType in_wait_type; pid_t in_wait_pid; /* Imagine that task A passes buffer |b| to the read() * syscall. Imagine that, after A is switched out for task B, * task B then writes to |b|. Then B is switched out for A. * Since rr doesn't schedule the kernel code, the result is * nondeterministic. To avoid that class of replay * divergence, we "redirect" (in)outparams passed to may-block * syscalls, to "scratch memory". The kernel writes to * scratch deterministically, and when A (in the example * above) exits its read() syscall, rr copies the scratch data * back to the original buffers, serializing A and B in the * example above. * * Syscalls can "nest" due to signal handlers. If a syscall A * is interrupted by a signal, and the sighandler calls B, * then we can have scratch buffers set up for args of both A * and B. In linux, B won't actually re-enter A; A is exited * with a "will-restart" error code and its args are saved for * when (or if) it's restarted after the signal. But that * doesn't really matter wrt scratch space. (TODO: in the * future, we may be able to use that fact to simplify * things.) * * Because of nesting, at first blush it seems we should push * scratch allocations onto a stack and pop them as syscalls * (or restarts thereof) complete. But under a critical * assumption, we can actually skip that. The critical * assumption is that the kernel writes its (in)outparams * atomically wrt signal interruptions, and only writes them * on successful exit. Each syscall will complete in stack * order, and it's invariant that the syscall processors must * only write back to user buffers *only* the data that was * written by the kernel. So as long as the atomicity * assumption holds, the completion of syscalls higher in the * event stack may overwrite scratch space, but the completion * of each syscall will overwrite those overwrites again, and * that over-overwritten data is exactly and only what we'll * write back to the tracee. * * |scratch_ptr| points at the mapped address in the child, * and |size| is the total available space. */ remote_ptr scratch_ptr; ssize_t scratch_size; /* Nonzero after the trace recorder has flushed the * syscallbuf. When this happens, the recorder must prepare a * "reset" of the buffer, to zero the record count, at the * next available slow (taking |desched| into * consideration). */ bool flushed_syscallbuf; /* Value of hdr->num_rec_bytes when the buffer was flushed */ uint32_t flushed_num_rec_bytes; /* This bit is set when code wants to prevent the syscall * record buffer from being reset when it normally would be. * Currently, the desched'd syscall code uses this. */ bool delay_syscallbuf_reset; /* The child's desched counter event fd number, and our local * dup. */ ScopedFd desched_fd; int desched_fd_child; /* True when the tracee has started using the syscallbuf, and * the tracer will start receiving PTRACE_SECCOMP events for * traced syscalls. We don't make any attempt to guess at the * OS's process/thread semantics; this flag goes on the first * time rr sees a PTRACE_SECCOMP event from the task. * * NB: there must always be at least one traced syscall before * any untraced ones; that's the magic "rrcall" the tracee * uses to initialize its syscallbuf. */ bool seccomp_bpf_enabled; // Value to return from PR_GET_SECCOMP uint8_t prctl_seccomp_status; /* State used during both recording and replay. */ PerfCounters hpc; /* This is always the "real" tid of the tracee. */ pid_t tid; /* This is always the recorded tid of the tracee. During * recording, it's synonymous with |tid|, and during replay * it's the tid that was recorded. */ pid_t rec_tid; /* This is the recorded tid of the tracee *in its own pid namespace*. * Only valid during recording, otherwise 0! */ pid_t own_namespace_rec_tid; /* Points at rr's mapping of the (shared) syscall buffer. */ struct syscallbuf_hdr* syscallbuf_hdr; size_t num_syscallbuf_bytes; /* Points at the tracee's mapping of the buffer. */ remote_ptr syscallbuf_child; remote_ptr syscallbuf_fds_disabled_child; remote_code_ptr stopping_breakpoint_table; int stopping_breakpoint_table_entry_size; PropertyTable& properties() { return properties_; } struct CapturedState { pid_t rec_tid; uint32_t serial; Registers regs; ExtraRegisters extra_regs; std::string prname; remote_ptr robust_futex_list; size_t robust_futex_list_len; std::vector thread_areas; size_t num_syscallbuf_bytes; int desched_fd_child; remote_ptr syscallbuf_child; std::vector syscallbuf_hdr; remote_ptr syscallbuf_fds_disabled_child; remote_ptr scratch_ptr; ssize_t scratch_size; int wait_status; sig_set_t blocked_sigs; std::deque pending_events; Ticks ticks; remote_ptr tid_futex; remote_ptr top_of_stack; }; private: Task(Session& session, pid_t tid, pid_t rec_tid, uint32_t serial, int priority, SupportedArch a); template void on_syscall_exit_arch(int syscallno, const Registers& regs); /** Helper function for update_sigaction. */ template void update_sigaction_arch(const Registers& regs); /** Helper function for init_buffers. */ template void init_buffers_arch(remote_ptr map_hint); /** * Return a new Task cloned from |p|. |flags| are a set of * CloneFlags (see above) that determine which resources are * shared or copied to the new child. |new_tid| is the tid * assigned to the new task by the kernel. |new_rec_tid| is * only relevant to replay, and is the pid that was assigned * to the task during recording. */ Task* clone(int flags, remote_ptr stack, remote_ptr tls, remote_ptr cleartid_addr, pid_t new_tid, pid_t new_rec_tid, uint32_t new_serial, Session* other_session = nullptr); /** * Grab state from this task into a structure that we can use to * initialize a new task via os_clone_into/os_fork_into and copy_state. */ CapturedState capture_state(); /** * Make this task look like an identical copy of the task whose state * was captured by capture_task_state(), in * every way relevant to replay. This task should have been * created by calling os_clone_into() or os_fork_into(), * and if it wasn't results are undefined. * * Some task state must be copied into this by injecting and * running syscalls in this task. Other state is metadata * that can simply be copied over in local memory. */ void copy_state(const CapturedState& state); /** * Destroy tracer-side state of this (as opposed to remote, * tracee-side state). */ void destroy_local_buffers(); /** * Make the ptrace |request| with |addr| and |data|, return * the ptrace return value. */ long fallible_ptrace(int request, remote_ptr addr, void* data); /** * Like |fallible_ptrace()| but infallible for most purposes. * Errors other than ESRCH are treated as fatal. Returns false if * we got ESRCH. This can happen any time during recording when the * task gets a SIGKILL from outside. */ bool ptrace_if_alive(int request, remote_ptr addr, void* data); /** * Like |fallible_ptrace()| but completely infallible. * All errors are treated as fatal. */ void xptrace(int request, remote_ptr addr, void* data); /** * Read tracee memory using PTRACE_PEEKDATA calls. Slow, only use * as fallback. Returns number of bytes actually read. */ ssize_t read_bytes_ptrace(remote_ptr addr, ssize_t buf_size, void* buf); /** * Write tracee memory using PTRACE_POKEDATA calls. Slow, only use * as fallback. Returns number of bytes actually written. */ ssize_t write_bytes_ptrace(remote_ptr addr, ssize_t buf_size, const void* buf); /** * Try writing 'buf' to 'addr' by replacing pages in the tracee * address-space using a temporary file. This may work around PaX issues. */ bool try_replace_pages(remote_ptr addr, ssize_t buf_size, const void* buf); /** * Map the syscallbuffer for this, shared with this process. * |map_hint| is the address where the syscallbuf is expected * to be mapped --- and this is asserted --- or nullptr if * there are no expectations. * Initializes syscallbuf_child. */ void init_syscall_buffer(AutoRemoteSyscalls& remote, remote_ptr map_hint); /** * True if this has blocked delivery of the desched signal. */ bool is_desched_sig_blocked(); /** * Make the OS-level calls to create a new fork or clone that * will eventually be a copy of this task and return that Task * metadata. These methods are used in concert with * |Task::copy_state()| to create task copies during * checkpointing. * * For |os_fork_into()|, |session| will be tracking the * returned fork child. * * For |os_clone_into()|, |task_leader| is the "main thread" * in the process into which the copy of this task will be * created. |task_leader| will perform the actual OS calls to * create the new child. */ Task* os_fork_into(Session* session); static Task* os_clone_into(const CapturedState& state, Task* task_leader, AutoRemoteSyscalls& remote); /** * Return the TraceStream that we're using, if in recording or replay. * Returns null if we're not in record or replay. */ const TraceStream* trace_stream() const; /** * Make the OS-level calls to clone |parent| into |session| * and return the resulting Task metadata for that new * process. This is as opposed to |Task::clone()|, which only * attaches Task metadata to an /existing/ process. * * The new clone will be tracked in |session|. The other * arguments are as for |Task::clone()| above. */ static Task* os_clone(Task* parent, Session* session, AutoRemoteSyscalls& remote, pid_t rec_child_tid, uint32_t new_serial, unsigned base_flags, remote_ptr stack = nullptr, remote_ptr ptid = nullptr, remote_ptr tls = nullptr, remote_ptr ctid = nullptr); /** Fork and exec a task to run |ae|, with |rec_tid|. */ static Task* spawn(Session& session, const TraceStream& trace, pid_t rec_tid = -1); uint32_t serial; // The address space of this task. AddressSpace::shr_ptr as; // The file descriptor table of this task. FdTable::shr_ptr fds; // The set of signals that are currently blocked. sig_set_t blocked_sigs; // The current stack of events being processed. (We use a // deque instead of a stack because we need to iterate the // events.) std::deque pending_events; // Task's OS name. std::string prname; // Count of all ticks seen by this task since tracees became // consistent and the task last wait()ed. Ticks ticks; // When |is_stopped|, these are our child registers. Registers registers; // True when we know via waitpid() that the task is stopped and we haven't // resumed it. bool is_stopped; // True when there was a breakpoint set at the location where we resumed // execution bool breakpoint_set_where_execution_resumed; // When |extra_registers_known|, we have saved our extra registers. ExtraRegisters extra_registers; bool extra_registers_known; // Futex list passed to |set_robust_list()|. We could keep a // strong type for this list head and read it if we wanted to, // but for now we only need to remember its address / size at // the time of the most recent set_robust_list() call. remote_ptr robust_futex_list; size_t robust_futex_list_len; // The session we're part of. Session* session_; // Points to the signal-hander table of this task. If this // task is a non-fork clone child, then the table will be // shared with all its "thread" siblings. Any updates made to // that shared table are immediately visible to all sibling // threads. // // fork children always get their own copies of the table. // And if this task exec()s, the table is copied and stripped // of user sighandlers (see below). */ std::shared_ptr sighandlers; // Stashed signal-delivery state, ready to be delivered at // next opportunity. std::deque stashed_signals; // Saved emulated-ptrace signals std::vector saved_ptrace_siginfos; // The task group this belongs to. std::shared_ptr tg; // Entries set by |set_thread_area()| or the |tls| argument to |clone()| // (when that's a user_desc). May be more than one due to different // entry_numbers. std::vector thread_areas_; // The memory cell the kernel will clear and notify on exit, // if our clone parent requested it. remote_ptr tid_futex; // The |stack| argument passed to |clone()|, which for // "threads" is the top of the user-allocated stack. remote_ptr top_of_stack; // The most recent status of this task as returned by // waitpid(). int wait_status; // The most recent siginfo (captured when wait_status shows pending_sig()) siginfo_t pending_siginfo; // True when a PTRACE_EXIT_EVENT has been observed in the wait_status // for this task. bool seen_ptrace_exit_event; PropertyTable properties_; Task(Task&) = delete; Task operator=(Task&) = delete; }; #endif /* RR_TASK_H_ */ rr-4.1.0/src/test/000077500000000000000000000000001265436462100137165ustar00rootroot00000000000000rr-4.1.0/src/test/64bit_child.c000066400000000000000000000007621265436462100161620ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(void) { /* Fork-and-exec 'echo'. The exec may fail if 'bash' is 64-bit and rr doesn't support 64-bit processes. That's fine; the test should still pass. We're testing that rr doesn't abort. */ FILE* f = popen("echo -n", "r"); while (1) { int ch = fgetc(f); if (ch < 0) { break; } putchar(ch); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/_llseek.c000066400000000000000000000006721265436462100155050ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(__attribute__((unused)) int argc, char* argv[]) { int fd = open(argv[0], O_RDONLY); #ifdef SYS__llseek loff_t result = -1234; #endif test_assert(fd >= 0); #ifdef SYS__llseek test_assert(syscall(SYS__llseek, fd, 0, 0, &result, SEEK_SET) == 0); test_assert(result == 0); #endif atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/abort_nonmain.c000066400000000000000000000007301265436462100167100ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void* kill_thread(__attribute__((unused)) void* dontcare) { atomic_puts("killing ..."); abort(); atomic_puts("FAILED: abort() didn't work"); return NULL; /* not reached */ } int main(void) { pthread_t t; pthread_create(&t, NULL, kill_thread, NULL); pthread_join(t, NULL); atomic_puts("FAILED: joined thread that should have died"); return 0; } rr-4.1.0/src/test/abort_nonmain.run000066400000000000000000000000661265436462100172740ustar00rootroot00000000000000source `dirname $0`/util.sh compare_test 'killing...' rr-4.1.0/src/test/accept.c000066400000000000000000000043151265436462100153240ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void client(const struct sockaddr_un* addr) { int clientfd; struct sockaddr_un a; socklen_t len = sizeof(a); char c; clientfd = socket(AF_UNIX, SOCK_STREAM, 0); test_assert(clientfd >= 0); test_assert(0 == connect(clientfd, (struct sockaddr*)addr, sizeof(*addr))); memset(&a, 0, sizeof(a)); test_assert(1 == recvfrom(clientfd, &c, 1, 0, &a, &len)); atomic_printf("recvfrom() -> %c from (%d,%s) len %d\n", c, a.sun_family, a.sun_path, len); test_assert(c == '!'); test_assert(len > 0); test_assert(addr->sun_family == a.sun_family); test_assert(!strcmp(addr->sun_path, a.sun_path)); exit(0); } static void server(int use_accept4, int pass_addr) { struct sockaddr_un addr; int listenfd; pid_t child; int servefd; struct sockaddr_un peer_addr; socklen_t len = sizeof(peer_addr); int status; struct sockaddr* peer_addr_ptr = pass_addr ? (struct sockaddr*)&peer_addr : NULL; socklen_t* len_ptr = pass_addr ? &len : NULL; memset(&addr, 0, sizeof(addr)); addr.sun_family = AF_UNIX; strncpy(addr.sun_path, "socket.unix", sizeof(addr.sun_path) - 1); test_assert(0 <= (listenfd = socket(AF_UNIX, SOCK_STREAM, 0))); test_assert(0 == bind(listenfd, (struct sockaddr*)&addr, sizeof(addr))); test_assert(0 == listen(listenfd, 1)); if (0 == (child = fork())) { client(&addr); test_assert("Not reached" && 0); } if (use_accept4) { test_assert(0 <= (servefd = accept4(listenfd, peer_addr_ptr, len_ptr, 0))); } else { test_assert(0 <= (servefd = accept(listenfd, peer_addr_ptr, len_ptr))); } if (pass_addr) { test_assert(AF_UNIX == peer_addr.sun_family); } test_assert(1 == send(servefd, "!", 1, 0)); test_assert(child == waitpid(child, &status, 0)); test_assert(WIFEXITED(status) && 0 == WEXITSTATUS(status)); unlink(addr.sun_path); close(servefd); close(listenfd); } int main(void) { int use_accept4, pass_addr; for (use_accept4 = 0; use_accept4 <= 1; ++use_accept4) { for (pass_addr = 0; pass_addr <= 1; ++pass_addr) { server(use_accept4, pass_addr); } } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/alarm.c000066400000000000000000000016261265436462100151630ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpoint(void) {} static volatile int caught_sig = 0; void catcher(__attribute__((unused)) int signum, __attribute__((unused)) siginfo_t* siginfo_ptr, __attribute__((unused)) void* ucontext_ptr) { caught_sig = signum; } int main(void) { struct sigaction sact; int counter; sigemptyset(&sact.sa_mask); sact.sa_flags = SA_SIGINFO; sact.sa_sigaction = catcher; sigaction(SIGALRM, &sact, NULL); alarm(1); /* timer will pop in 1 second */ for (counter = 0; counter >= 0 && !caught_sig; counter++) { if (counter % 100000 == 0) { write(STDOUT_FILENO, ".", 1); } } atomic_printf("\nSignal %d caught, Counter is %d\n", caught_sig, counter); test_assert(SIGALRM == caught_sig); breakpoint(); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/alarm2.c000066400000000000000000000010741265436462100152420ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" void catcher(__attribute__((unused)) int signum, __attribute__((unused)) siginfo_t* siginfo_ptr, __attribute__((unused)) void* ucontext_ptr) { atomic_puts("EXIT-SUCCESS"); exit(0); } int main(void) { struct sigaction sact; int r = 0; sigemptyset(&sact.sa_mask); sact.sa_flags = SA_SIGINFO; sact.sa_sigaction = catcher; sigaction(SIGALRM, &sact, NULL); alarm(1); /* timer will pop in 1 second */ sleep(10); return r; } rr-4.1.0/src/test/alsa_ioctl.c000066400000000000000000000015031265436462100161730ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #ifndef ALSA_DEVICE_DIRECTORY #define ALSA_DEVICE_DIRECTORY "/dev/snd/" #endif int main(void) { int fd = open(ALSA_DEVICE_DIRECTORY "control0", O_NONBLOCK | O_RDONLY); if (fd < 0) { test_assert(errno == EACCES || errno == ENOENT); } else { int* pversion; struct snd_ctl_card_info* info; ALLOCATE_GUARD(pversion, 'x'); *pversion = -1; test_assert(0 == ioctl(fd, SNDRV_CTL_IOCTL_PVERSION, pversion)); VERIFY_GUARD(pversion); test_assert(*pversion >= 0); ALLOCATE_GUARD(info, 1); test_assert(0 == ioctl(fd, SNDRV_CTL_IOCTL_CARD_INFO, info)); VERIFY_GUARD(info); test_assert(info->id[0] > 1); test_assert(info->driver[0] > 1); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/arch_prctl.c000066400000000000000000000004241265436462100162030ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(void) { #ifdef __x86_64__ unsigned long addr; test_assert(0 == syscall(SYS_arch_prctl, ARCH_GET_FS, &addr)); #endif atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/args.c000066400000000000000000000006611265436462100150210ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { test_assert(6 == argc); test_assert(!strcmp("-no", argv[1])); test_assert(!strcmp("--force-syscall-buffer=foo", argv[2])); test_assert(!strcmp("-c", argv[3])); test_assert(!strcmp("1000", argv[4])); test_assert(!strcmp("hello", argv[5])); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/args.run000066400000000000000000000001731265436462100154010ustar00rootroot00000000000000source `dirname $0`/util.sh record args$bitness "-no --force-syscall-buffer=foo -c 1000 hello" replay check 'EXIT-SUCCESS' rr-4.1.0/src/test/async_kill_with_threads.c000066400000000000000000000005321265436462100207570ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void* start_thread(__attribute__((unused)) void* p) { sleep(1000); return NULL; } int main(void) { pthread_t thread; pthread_create(&thread, NULL, start_thread, NULL); atomic_puts("EXIT-SUCCESS"); sleep(1000); return 0; } rr-4.1.0/src/test/async_kill_with_threads.run000066400000000000000000000001611265436462100213370ustar00rootroot00000000000000source `dirname $0`/util.sh # SIGKILL, wait 2.0s record_async_signal 9 2.0 $TESTNAME replay check 'EXIT-SUCCESS' rr-4.1.0/src/test/async_kill_with_threads_main_running.c000066400000000000000000000005351265436462100235260ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void* start_thread(__attribute__((unused)) void* p) { sleep(1000); return NULL; } int main(void) { pthread_t thread; pthread_create(&thread, NULL, start_thread, NULL); atomic_puts("EXIT-SUCCESS"); while (1) { } return 0; } rr-4.1.0/src/test/async_kill_with_threads_main_running.run000066400000000000000000000001611265436462100241030ustar00rootroot00000000000000source `dirname $0`/util.sh # SIGKILL, wait 2.0s record_async_signal 9 2.0 $TESTNAME replay check 'EXIT-SUCCESS' rr-4.1.0/src/test/async_kill_with_threads_thread_running.c000066400000000000000000000005351265436462100240510ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void* start_thread(__attribute__((unused)) void* p) { while (1) { } return NULL; } int main(void) { pthread_t thread; pthread_create(&thread, NULL, start_thread, NULL); atomic_puts("EXIT-SUCCESS"); sleep(1000); return 0; } rr-4.1.0/src/test/async_kill_with_threads_thread_running.run000066400000000000000000000005441265436462100244330ustar00rootroot00000000000000source `dirname $0`/util.sh # SIGKILL, wait 6.0s # The timeout must be greater than rr's alarm threshold of 3s, because # the spinning-thread may not do any conditional branches so we rely # on the alarm fallback to ensure there's a context switch to the main # thread to print EXIT-SUCCESS. record_async_signal 9 6.0 $TESTNAME replay check 'EXIT-SUCCESS' rr-4.1.0/src/test/async_segv.c000066400000000000000000000010221265436462100162160ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void handle_segv(int sig) { test_assert(SIGSEGV == sig); atomic_puts("EXIT-SUCCESS"); exit(0); } int main(void) { int dummy = 0, i; signal(SIGSEGV, handle_segv); /* No syscalls after here! (Up to the assert.) */ for (i = 1; i < (1 << 30); ++i) { dummy += (dummy + i) % 9735; } /* It's possible for SEGV to be delivered too late, so succeed anyway */ atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/async_segv.run000066400000000000000000000001601265436462100166020ustar00rootroot00000000000000source `dirname $0`/util.sh # SIGSEGV, wait 2.0s record_async_signal 11 2.0 $TESTNAME replay check EXIT-SUCCESS rr-4.1.0/src/test/async_segv_ignored.c000066400000000000000000000003351265436462100177330ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(void) { signal(SIGSEGV, SIG_IGN); kill(getpid(), SIGSEGV); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/async_signal_syscalls.c000066400000000000000000000027101265436462100204510ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static sig_atomic_t caught_usr1; static void handle_usr1(int sig) { test_assert(SIGUSR1 == sig); atomic_puts("caught usr1"); caught_usr1 = 1; } static void* do_thread(__attribute__((unused)) void* p) { while (1) { sched_yield(); } return NULL; } int main(int argc, char* argv[]) { struct timespec ts; struct timeval tv; int num_its; int i; pthread_t thread; /* Create an extra thread so context switches can happen and SCHED events will be recorded. */ pthread_create(&thread, NULL, do_thread, NULL); test_assert(argc == 2); num_its = atoi(argv[1]); test_assert(num_its > 0); atomic_printf("Running 2^%d iterations\n", num_its); signal(SIGUSR1, handle_usr1); /* Driver scripts choose the number of iterations based on * their needs. */ for (i = 0; i < 1 << num_its; ++i) { /* The odds of the signal being caught in the library * implementing these syscalls is very high. But even * if it's not caught there, this test will pass. */ clock_gettime(CLOCK_MONOTONIC, &ts); gettimeofday(&tv, NULL); clock_gettime(CLOCK_MONOTONIC, &ts); gettimeofday(&tv, NULL); clock_gettime(CLOCK_MONOTONIC, &ts); gettimeofday(&tv, NULL); clock_gettime(CLOCK_MONOTONIC, &ts); gettimeofday(&tv, NULL); if (caught_usr1) { break; } } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/async_signal_syscalls.run000066400000000000000000000006641265436462100210410ustar00rootroot00000000000000source `dirname $0`/util.sh # Without the syscallbuf, trying to record the large number of # syscalls in this test is impractical. skip_if_no_syscall_buf # 2^17 iterations is arbitrarily chosen to take ~3s on a fast machine record $TESTNAME 17 # Because of issue #184, replay takes longer than practical. So for # now we'll skip it and hope other tests exercise the relevant code # well enough. #replay #check 'EXIT-SUCCESS' passed rr-4.1.0/src/test/async_signal_syscalls2.c000066400000000000000000000034721265436462100205410ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define ITERATION_COUNT 10 static int usr1_count = 0; static int usr2_count = 0; static int alrm_count = 0; static volatile int done; static int ready_fds[2]; static void handle_signal(int sig) { switch (sig) { case SIGUSR1: usr1_count++; break; case SIGUSR2: usr2_count++; break; case SIGALRM: alrm_count++; break; default: test_assert(0); break; } test_assert(1 == write(ready_fds[1], "K", 1)); } static void* thread_start(__attribute__((unused)) void* p) { struct timespec ts = { 0, 1000 }; sigset_t mask; int i; sigemptyset(&mask); sigaddset(&mask, SIGUSR1); sigaddset(&mask, SIGUSR2); sigaddset(&mask, SIGALRM); test_assert(0 == pthread_sigmask(SIG_BLOCK, &mask, NULL)); for (i = 0; i < ITERATION_COUNT; ++i) { char buf[3]; int count = 3; nanosleep(&ts, NULL); if (i > 0) { while (count > 0) { int n = read(ready_fds[0], buf, count); test_assert(n > 0); count -= n; } } kill(getpid(), SIGUSR1); kill(getpid(), SIGUSR2); kill(getpid(), SIGALRM); } done = 1; return NULL; } int main(void) { struct timespec ts; pthread_t thread; int fd; char buf[10]; test_assert(0 == pipe(ready_fds)); fd = open("/dev/zero", O_RDONLY); signal(SIGUSR1, handle_signal); signal(SIGUSR2, handle_signal); signal(SIGALRM, handle_signal); pthread_create(&thread, NULL, thread_start, NULL); while (!done) { clock_gettime(CLOCK_MONOTONIC, &ts); read(fd, buf, sizeof(buf)); } test_assert(usr1_count == ITERATION_COUNT); test_assert(usr2_count == ITERATION_COUNT); test_assert(alrm_count == ITERATION_COUNT); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/async_signal_syscalls_100.run000066400000000000000000000007071265436462100214170ustar00rootroot00000000000000source `dirname $0`/util.sh # It's relatively easy to reproduce a CPUID divergence caused by lack # of CPU binding. GLOBAL_OPTIONS="$GLOBAL_OPTIONS_BIND_CPU" # Ensure that the test records some SCHED interrupt events. timeslice=100 RECORD_ARGS="-c$timeslice" # 2^9 iterations is arbitrarily chosen to record and replay < 15s, # both with and without the syscallbuf, on a fast machine. record async_signal_syscalls$bitness 9 replay check 'EXIT-SUCCESS' rr-4.1.0/src/test/async_signal_syscalls_1000.run000066400000000000000000000005111265436462100214700ustar00rootroot00000000000000source `dirname $0`/util.sh # Ensure that the test records some USR_SCHED interrupt events. timeslice=1000 RECORD_ARGS="-c$timeslice" # 2^9 iterations is arbitrarily chosen to record and replay < 15s, # both with and without the syscallbuf, on a fast machine. record async_signal_syscalls$bitness 9 replay check 'EXIT-SUCCESS' rr-4.1.0/src/test/async_signal_syscalls_siginfo.c000066400000000000000000000040151265436462100221670ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static sig_atomic_t caught_usr1; static siginfo_t siginfo; #define MAGIC_NUMBER 0x7654321 /* positive */ static void handle_usr1(int sig, siginfo_t* si, __attribute__((unused)) void* context) { test_assert(SIGUSR1 == sig); test_assert(si->si_code == siginfo.si_code); test_assert(si->si_pid == siginfo.si_pid); test_assert(si->si_uid == siginfo.si_uid); test_assert(si->si_value.sival_int == siginfo.si_value.sival_int); caught_usr1 = 1; atomic_puts("caught usr1"); } static void* thread_start(__attribute__((unused)) void* p) { usleep(1000); siginfo.si_code = SI_QUEUE; siginfo.si_pid = getpid(); siginfo.si_uid = geteuid(); siginfo.si_value.sival_int = MAGIC_NUMBER; syscall(SYS_rt_tgsigqueueinfo, getpid(), getpid(), SIGUSR1, &siginfo); return NULL; } int main(int argc, char* argv[]) { struct timespec ts; struct timeval tv; int num_its; int i; struct sigaction sa; pthread_t thread; test_assert(argc == 2); num_its = atoi(argv[1]); test_assert(num_its > 0); atomic_printf("Running 2^%d iterations\n", num_its); sa.sa_flags = SA_SIGINFO; sigemptyset(&sa.sa_mask); sa.sa_sigaction = handle_usr1; sigaction(SIGUSR1, &sa, NULL); pthread_create(&thread, NULL, thread_start, NULL); /* Driver scripts choose the number of iterations based on * their needs. */ for (i = 0; i < 1 << num_its; ++i) { /* The odds of the signal being caught in the library * implementing these syscalls is very high. But even * if it's not caught there, this test will pass. */ clock_gettime(CLOCK_MONOTONIC, &ts); gettimeofday(&tv, NULL); clock_gettime(CLOCK_MONOTONIC, &ts); gettimeofday(&tv, NULL); clock_gettime(CLOCK_MONOTONIC, &ts); gettimeofday(&tv, NULL); clock_gettime(CLOCK_MONOTONIC, &ts); gettimeofday(&tv, NULL); } pthread_join(thread, NULL); test_assert(caught_usr1); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/async_signal_syscalls_siginfo.run000066400000000000000000000006641265436462100225570ustar00rootroot00000000000000source `dirname $0`/util.sh # Without the syscallbuf, trying to record the large number of # syscalls in this test is impractical. skip_if_no_syscall_buf # 2^17 iterations is arbitrarily chosen to take ~3s on a fast machine record $TESTNAME 17 # Because of issue #184, replay takes longer than practical. So for # now we'll skip it and hope other tests exercise the relevant code # well enough. #replay #check 'EXIT-SUCCESS' passed rr-4.1.0/src/test/async_usr1.c000066400000000000000000000011341265436462100161500ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static sig_atomic_t caught_usr1; static void handle_usr1(int sig) { test_assert(SIGUSR1 == sig); caught_usr1 = 1; atomic_puts("caught usr1"); } static void breakpoint(void) { int break_here = 1; (void)break_here; } int main(void) { int dummy = 0, i; signal(SIGUSR1, handle_usr1); breakpoint(); /* NO SYSCALLS AFTER HERE! (Up to the assert.) */ for (i = 1; !caught_usr1 && i < (1 << 30); ++i) { dummy += (dummy + i) % 9735; } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/async_usr1.run000066400000000000000000000001621265436462100165320ustar00rootroot00000000000000source `dirname $0`/util.sh # SIGUSR1, wait 2.0s record_async_signal 10 2.0 $TESTNAME replay check 'EXIT-SUCCESS' rr-4.1.0/src/test/at_threadexit.c000066400000000000000000000010461265436462100167100ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static pthread_key_t exit_key; static void thread_exit(__attribute__((unused)) void* data) { atomic_puts("thread exit"); } static void* thread(__attribute__((unused)) void* unused) { pthread_key_create(&exit_key, thread_exit); pthread_setspecific(exit_key, (void*)0x1); pthread_exit(NULL); } int main(void) { pthread_t t; pthread_create(&t, NULL, thread, NULL); pthread_join(t, NULL); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/backtrace_syscall.py000066400000000000000000000002771265436462100177470ustar00rootroot00000000000000from rrutil import * send_gdb('b __kernel_vsyscall') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('bt') expect_gdb(r'#0 [^_]*__kernel_vsyscall \(\)') ok() rr-4.1.0/src/test/backtrace_syscall.run000066400000000000000000000001121265436462100201070ustar00rootroot00000000000000source `dirname $0`/util.sh record simple$bitness debug backtrace_syscall rr-4.1.0/src/test/bad_breakpoint.run000066400000000000000000000003221265436462100174050ustar00rootroot00000000000000source `dirname $0`/util.sh record simple$bitness for i in $(seq 15 25); do echo Replaying to event $i ... debug restart_finish "-g $i" if [[ "$leave_data" == "y" ]]; then break fi done rr-4.1.0/src/test/bad_good_break.py000066400000000000000000000004371265436462100171760ustar00rootroot00000000000000from rrutil import * send_gdb('b bad_breakpoint') expect_gdb('Breakpoint 1') send_gdb('b good_breakpoint') expect_gdb('Breakpoint 2') send_gdb('c') # If we hit bad_breakpoint, then we never continue and never reach # good_breakpoint. expect_gdb('Breakpoint 2, good_breakpoint') ok() rr-4.1.0/src/test/bad_ip.c000066400000000000000000000007501265436462100153020ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void sighandler(int sig, siginfo_t* si, void* utp) { test_assert(SIGSEGV == sig && si->si_addr == (void*)0x42); atomic_puts("EXIT-SUCCESS"); _exit(0); } int main(void) { struct sigaction act; act.sa_sigaction = sighandler; sigemptyset(&act.sa_mask); act.sa_flags = SA_SIGINFO; sigaction(SIGSEGV, &act, NULL); __asm__ __volatile__("call 0x42"); return 0; } rr-4.1.0/src/test/bad_syscall.c000066400000000000000000000005021265436462100163370ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { int ret = syscall(-10); test_assert(-1 == ret && ENOSYS == errno); ret = syscall(9999); test_assert(-1 == ret && ENOSYS == errno); atomic_puts("EXIT-SUCCESS"); return ret; } rr-4.1.0/src/test/barrier.c000066400000000000000000000031721265436462100155130ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpoint(void) { int break_here = 1; (void)break_here; } static void hit_barrier(void) { int break_here = 1; (void)break_here; } static void joined_threads(void) { int break_here = 1; (void)break_here; } static void set_thread_name(int id) { char name_buf[16]; snprintf(name_buf, sizeof(name_buf), "BP-THREAD-%d", id); prctl(PR_SET_NAME, name_buf); } struct thread_data { int threadno; pthread_barrier_t* bar; }; static void* thread(void* datap) { struct thread_data* data = datap; pthread_barrier_t* bar = data->bar; set_thread_name(data->threadno); atomic_printf("thread %d launched with data %p\n", data->threadno, data); breakpoint(); pthread_barrier_wait(bar); pthread_barrier_wait(bar); atomic_printf("thread %d done\n", data->threadno); free(data); return NULL; } int main(int argc, char* argv[]) { struct timeval tv; pthread_barrier_t bar; pthread_t threads[10]; int i; /* (Kick on the syscallbuf lib.) */ gettimeofday(&tv, NULL); pthread_barrier_init(&bar, NULL, 1 + ALEN(threads)); set_thread_name(1); for (i = 0; i < ALEN(threads); ++i) { struct thread_data* data = calloc(1, sizeof(*data)); data->threadno = i + 2; data->bar = &bar; pthread_create(&threads[i], NULL, thread, data); } pthread_barrier_wait(&bar); hit_barrier(); pthread_barrier_wait(&bar); atomic_puts("main done"); for (i = 0; i < ALEN(threads); ++i) { pthread_join(threads[i], NULL); } joined_threads(); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/basic_test.run000066400000000000000000000000661265436462100165660ustar00rootroot00000000000000source `dirname $0`/util.sh compare_test EXIT-SUCCESS rr-4.1.0/src/test/big_buffers.c000066400000000000000000000042541265436462100163440ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define DUMMY_FILE "dummy.txt" #define BUF_SIZE (1 << 24) int main(int argc, char* argv[]) { struct timeval ts; char* buf; int fd; int sockfds[2]; ssize_t nread; gettimeofday(&ts, NULL); buf = malloc(BUF_SIZE); socketpair(AF_LOCAL, SOCK_STREAM, 0, sockfds); /* Big read() buffer. */ fd = creat(DUMMY_FILE, 0600); write(fd, "foo", 3); close(fd); fd = open(DUMMY_FILE, O_RDONLY); unlink(DUMMY_FILE); nread = read(fd, buf, BUF_SIZE); atomic_printf("read %zu bytes: %s\n", nread, buf); test_assert(3 == nread && !strcmp(buf, "foo")); /* Big recv() buffer. */ write(sockfds[0], "bar", 3); nread = recv(sockfds[1], buf, BUF_SIZE, 0); atomic_printf("recv'd %zu bytes: %s\n", nread, buf); test_assert(3 == nread && !strcmp(buf, "bar")); /* Big recvfrom() buffer. */ write(sockfds[0], "baz", 3); nread = recvfrom(sockfds[1], buf, BUF_SIZE, 0, NULL, NULL); atomic_printf("recvfrom'd %zu bytes: %s\n", nread, buf); test_assert(3 == nread && !strcmp(buf, "baz")); { struct mmsghdr mmsg = { { 0 } }; struct iovec data = { 0 }; mmsg.msg_hdr.msg_iov = &data; mmsg.msg_hdr.msg_iovlen = 1; /* Big recvmsg() buffer. */ data.iov_base = "foo"; data.iov_len = 3; test_assert(3 <= sendmsg(sockfds[0], &mmsg.msg_hdr, 0)); data.iov_base = buf; data.iov_len = BUF_SIZE; nread = recvmsg(sockfds[1], &mmsg.msg_hdr, 0); atomic_printf("recvmsg'd %zu bytes: %s\n", nread, buf); test_assert(3 <= nread && !strcmp(buf, "foo")); /* Big recvmmsg() buffer. */ data.iov_base = "bar"; data.iov_len = 3; test_assert(1 == sendmmsg(sockfds[0], &mmsg, 1, 0)); data.iov_base = buf; data.iov_len = BUF_SIZE; test_assert(1 == recvmmsg(sockfds[1], &mmsg, 1, 0, NULL)); nread = mmsg.msg_len; atomic_printf("recvmmsg'd %zu bytes: %s\n", nread, buf); test_assert(3 <= nread && !strcmp(buf, "bar")); } /* TODO: tests for epoll_wait() / poll() / select() (/ * prctl()?), which are much less likely to have buffers big * enough to overflow scratch. */ atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/block.c000066400000000000000000000377461265436462100151750ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #include #define CTRLMSG_LEN CMSG_LEN(sizeof(int)) struct sendmmsg_arg { int sockfd; struct mmsghdr* msgvec; unsigned int vlen; unsigned int flags; }; struct recvmmsg_arg { int sockfd; struct mmsghdr* msgvec; unsigned int vlen; unsigned int flags; struct timespec* timeout; }; struct select_arg { int n_fds; fd_set* read; fd_set* write; fd_set* except; struct timeval* timeout; }; static void breakpoint(void) { int break_here = 1; (void)break_here; } static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; static int sockfds[2]; static const int msg_magic = 0x1337beef; const ssize_t num_sockbuf_bytes = 1 << 20; static void* reader_thread(void* dontcare) { char token = '!'; int sock = sockfds[1]; struct timeval ts; char c = '\0'; int i; gettimeofday(&ts, NULL); atomic_puts("r: acquiring mutex ..."); pthread_mutex_lock(&lock); atomic_puts("r: ... releasing mutex"); pthread_mutex_unlock(&lock); for (i = 0; i < 2; ++i) { atomic_puts("r: reading socket ..."); gettimeofday(&ts, NULL); test_assert(1 == read(sock, &c, sizeof(c))); atomic_printf("r: ... read '%c'\n", c); test_assert(c == token); ++token; } /* TODO: readv() support */ atomic_puts("r: recv'ing socket ..."); gettimeofday(&ts, NULL); test_assert(1 == recv(sock, &c, sizeof(c), 0)); atomic_printf("r: ... recv'd '%c'\n", c); test_assert(c == token); ++token; atomic_puts("r: recvfrom'ing socket ..."); test_assert(1 == recvfrom(sock, &c, sizeof(c), 0, NULL, NULL)); atomic_printf("r: ... recvfrom'd '%c'\n", c); test_assert(c == token); ++token; { struct sockaddr_un addr; socklen_t addrlen = sizeof(addr); atomic_puts("r: recvfrom(&sock)'ing socket ..."); test_assert(1 == recvfrom(sock, &c, sizeof(c), 0, &addr, &addrlen)); atomic_printf("r: ... recvfrom'd '%c' from sock len:%d\n", c, addrlen); test_assert(c == token); /* socketpair() AF_LOCAL sockets don't identify * themselves. */ test_assert(addrlen == 0); ++token; } { struct mmsghdr mmsg = { { 0 } }; struct iovec data = { 0 }; int magic = ~msg_magic; int err, ret; data.iov_base = &magic; data.iov_len = sizeof(magic); mmsg.msg_hdr.msg_iov = &data; mmsg.msg_hdr.msg_iovlen = 1; struct cmsghdr* cmptr = (struct cmsghdr*)malloc(CTRLMSG_LEN); mmsg.msg_hdr.msg_control = cmptr; mmsg.msg_hdr.msg_controllen = CTRLMSG_LEN; atomic_puts("r: recvmsg with DONTWAIT ..."); ret = recvmsg(sock, &mmsg.msg_hdr, MSG_DONTWAIT); err = errno; atomic_printf("r: ... returned %d (%s/%d)\n", ret, strerror(err), err); test_assert(-1 == ret); test_assert(EWOULDBLOCK == err); test_assert(mmsg.msg_hdr.msg_iov == &data); atomic_puts("r: recmsg'ing socket ..."); test_assert(0 < recvmsg(sock, &mmsg.msg_hdr, 0)); atomic_printf("r: ... recvmsg'd 0x%x\n", magic); test_assert(msg_magic == magic); test_assert(mmsg.msg_hdr.msg_iov == &data); int fd; memcpy(&fd, CMSG_DATA(cmptr), sizeof(fd)); struct stat fs_new, fs_old; fstat(fd, &fs_new); fstat(STDERR_FILENO, &fs_old); // check if control msg was send successfully test_assert( fs_old.st_dev == fs_new.st_dev && fs_old.st_ino == fs_new.st_ino && fs_old.st_uid == fs_new.st_uid && fs_old.st_gid == fs_new.st_gid && fs_old.st_rdev == fs_new.st_rdev && fs_old.st_size == fs_new.st_size); magic = ~msg_magic; atomic_puts("r: recmmsg'ing socket ..."); breakpoint(); test_assert(1 == recvmmsg(sock, &mmsg, 1, 0, NULL)); atomic_printf("r: ... recvmmsg'd 0x%x (%u bytes)\n", magic, mmsg.msg_len); test_assert(msg_magic == magic); test_assert(mmsg.msg_hdr.msg_iov == &data); magic = ~msg_magic; #if defined(SYS_socketcall) struct recvmmsg_arg arg = { 0 }; arg.sockfd = sock; arg.msgvec = &mmsg; arg.vlen = 1; test_assert(1 == syscall(SYS_socketcall, SYS_RECVMMSG, (void*)&arg)); #elif defined(SYS_recvmmsg) test_assert(1 == syscall(SYS_recvmmsg, sock, &mmsg, 1, 0, NULL)); #else #error unable to call recvmmsg #endif atomic_printf("r: ... recvmmsg'd(by socketcall) 0x%x (%u bytes)\n", magic, mmsg.msg_len); test_assert(msg_magic == magic); free(cmptr); } { struct msghdr msg = { 0 }; struct iovec iovs[2]; char c1 = '\0', c2 = '\0'; iovs[0].iov_base = &c1; iovs[0].iov_len = sizeof(c1); iovs[1].iov_base = &c2; iovs[1].iov_len = sizeof(c2); msg.msg_iov = iovs; msg.msg_iovlen = sizeof(iovs) / sizeof(iovs[0]); atomic_puts("r: recmsg'ing socket with two iovs ..."); test_assert(2 == recvmsg(sock, &msg, 0)); atomic_printf("r: ... recvmsg'd '%c' and '%c'\n", c1, c2); test_assert(c1 == token); token++; test_assert(c2 == token); token++; } { struct pollfd pfd; atomic_puts("r: polling socket ..."); pfd.fd = sock; pfd.events = POLLIN; gettimeofday(&ts, NULL); poll(&pfd, 1, -1); atomic_puts("r: ... done, doing nonblocking read ..."); test_assert(1 == read(sock, &c, sizeof(c))); atomic_printf("r: ... read '%c'\n", c); test_assert(c == token); ++token; } { struct pollfd pfd; atomic_puts("r: polling socket ..."); pfd.fd = sock; pfd.events = POLLIN; gettimeofday(&ts, NULL); ppoll(&pfd, 1, NULL, NULL); atomic_puts("r: ... done, doing nonblocking read ..."); test_assert(1 == read(sock, &c, sizeof(c))); atomic_printf("r: ... read '%c'\n", c); test_assert(c == token); ++token; } { fd_set fds; const struct timeval infinity = { 1 << 30, 0 }; struct timeval tv = infinity; int ret; atomic_puts("r: select()ing socket ..."); FD_ZERO(&fds); FD_SET(sock, &fds); #if defined(__i386__) struct select_arg arg = { 0 }; arg.n_fds = sock + 1; arg.read = &fds; arg.write = NULL; arg.except = NULL; arg.timeout = &tv; ret = syscall(SYS_select, &arg); #else ret = syscall(SYS_select, sock + 1, &fds, NULL, NULL, &tv); #endif atomic_printf("r: ... returned %d; tv { %ld, %ld }\n", ret, tv.tv_sec, tv.tv_usec); test_assert(1 == ret); test_assert(FD_ISSET(sock, &fds)); test_assert(0 < tv.tv_sec && tv.tv_sec < infinity.tv_sec); atomic_puts("r: ... done, doing nonblocking read ..."); test_assert(1 == read(sock, &c, sizeof(c))); atomic_printf("r: ... read '%c'\n", c); test_assert(c == token); ++token; } { fd_set fds; const struct timeval infinity = { 1 << 30, 0 }; struct timeval tv = infinity; int ret; atomic_puts("r: select()ing socket ..."); FD_ZERO(&fds); FD_SET(sock, &fds); ret = select(sock + 1, &fds, NULL, NULL, &tv); atomic_printf("r: ... returned %d; tv { %ld, %ld }\n", ret, tv.tv_sec, tv.tv_usec); test_assert(1 == ret); test_assert(FD_ISSET(sock, &fds)); test_assert(0 < tv.tv_sec && tv.tv_sec < infinity.tv_sec); atomic_puts("r: ... done, doing nonblocking read ..."); test_assert(1 == read(sock, &c, sizeof(c))); atomic_printf("r: ... read '%c'\n", c); test_assert(c == token); ++token; } { int epfd; struct epoll_event ev; atomic_puts("r: epolling socket ..."); test_assert(0 <= (epfd = epoll_create(1 /*num events*/))); ev.events = EPOLLIN; ev.data.fd = sock; gettimeofday(&ts, NULL); test_assert(0 == epoll_ctl(epfd, EPOLL_CTL_ADD, ev.data.fd, &ev)); test_assert(1 == epoll_wait(epfd, &ev, 1, -1)); atomic_puts("r: ... done, doing nonblocking read ..."); test_assert(sock == ev.data.fd); test_assert(1 == epoll_wait(epfd, &ev, 1, -1)); test_assert(1 == read(sock, &c, sizeof(c))); atomic_printf("r: ... read '%c'\n", c); test_assert(c == token); ++token; close(epfd); } { char* buf = (char*)malloc(num_sockbuf_bytes); ssize_t nwritten = 0; struct iovec iov; ++token; memset(buf, token, num_sockbuf_bytes); atomic_printf("r: writing outbuf of size %zd ...\n", num_sockbuf_bytes); while (nwritten < num_sockbuf_bytes) { ssize_t this_write = write(sock, buf, num_sockbuf_bytes - nwritten); atomic_printf("r: wrote %zd bytes this time\n", this_write); nwritten += this_write; } ++token; memset(buf, token, num_sockbuf_bytes); iov.iov_base = buf; iov.iov_len = num_sockbuf_bytes; atomic_printf("r: writev()ing outbuf of size %zd ...\n", num_sockbuf_bytes); while (iov.iov_len > 0) { ssize_t this_write = writev(sock, &iov, 1); atomic_printf("r: wrote %zd bytes this time\n", this_write); iov.iov_len -= this_write; } free(buf); } atomic_puts("r: reading socket with masked signals ..."); { sigset_t old_mask, mask; sigfillset(&mask); test_assert(0 == pthread_sigmask(SIG_BLOCK, &mask, &old_mask)); test_assert(1 == read(sock, &c, sizeof(c))); test_assert(0 == pthread_sigmask(SIG_SETMASK, &old_mask, NULL)); } ++token; atomic_printf("r: ... read '%c'\n", c); test_assert(c == token); /* Make the main thread wait on our join() */ atomic_puts("r: sleeping ..."); usleep(500000); return NULL; } static void read_all_chunks(int sock, char* buf, ssize_t num_sockbuf_bytes, char token) { ssize_t nread = 0; while (nread < num_sockbuf_bytes) { char* this_buf = buf + nread; ssize_t this_read = read(sock, this_buf, num_sockbuf_bytes - nread); int i; atomic_printf("M: read %zd bytes this time,\n", this_read); test_assert(this_read > 0); /* XXX: we would like to assert that the written data * was read in more than one chunk, which should imply * that at least one write() from the other thread * blocked, but it's possible for multiple write()s to * complete and fill the read buffer here before the * reader returns. */ /*test_assert(this_read < num_sockbuf_bytes);*/ for (i = nread; i < nread + this_read; ++i) { if (token != buf[i]) { atomic_printf("M: byte %d should be '%c', but is '%c'\n", i, token, buf[i]); } } nread += this_read; atomic_printf("M: %zd total so far\n", nread); } } int main(int argc, char* argv[]) { char token = '!'; struct timeval ts; pthread_t reader; int sock; gettimeofday(&ts, NULL); socketpair(AF_LOCAL, SOCK_STREAM, 0, sockfds); sock = sockfds[0]; pthread_mutex_lock(&lock); pthread_create(&reader, NULL, reader_thread, NULL); /* Make the reader thread wait on its pthread_mutex_lock() */ atomic_puts("M: sleeping ..."); usleep(500000); atomic_puts("M: unlocking mutex ..."); pthread_mutex_unlock(&lock); atomic_puts("M: ... done"); /* Force a wait on read() */ atomic_puts("M: sleeping again ..."); usleep(500000); atomic_printf("M: writing '%c' to socket ...\n", token); test_assert(1 == write(sock, &token, sizeof(token))); ++token; atomic_puts("M: ... done"); /* Force a wait on readv() */ { struct iovec v = {.iov_base = &token, .iov_len = sizeof(token) }; atomic_puts("M: sleeping again ..."); usleep(500000); atomic_printf("r: writev('%c')'ing socket ...\n", token); test_assert(1 == writev(sock, &v, 1)); ++token; atomic_puts("M: ... done"); } /* Force a wait on recv() */ atomic_puts("M: sleeping again ..."); usleep(500000); atomic_printf("M: sending '%c' to socket ...\n", token); send(sock, &token, sizeof(token), 0); ++token; atomic_puts("M: ... done"); /* Force a wait on recvfrom() */ atomic_puts("M: sleeping again ..."); usleep(500000); atomic_printf("M: sending '%c' to socket ...\n", token); send(sock, &token, sizeof(token), 0); ++token; atomic_puts("M: ... done"); /* Force a wait on recvfrom(&sock) */ atomic_puts("M: sleeping again ..."); usleep(500000); atomic_printf("M: sending '%c' to socket ...\n", token); send(sock, &token, sizeof(token), 0); ++token; atomic_puts("M: ... done"); { struct mmsghdr mmsg = { { 0 } }; struct iovec data = { 0 }; int magic = msg_magic; data.iov_base = &magic; data.iov_len = sizeof(magic); mmsg.msg_hdr.msg_iov = &data; mmsg.msg_hdr.msg_iovlen = 1; struct cmsghdr* cmptr = (struct cmsghdr*)malloc(CTRLMSG_LEN); // send a fd cmptr->cmsg_level = SOL_SOCKET; cmptr->cmsg_type = SCM_RIGHTS; cmptr->cmsg_len = CTRLMSG_LEN; mmsg.msg_hdr.msg_control = cmptr; mmsg.msg_hdr.msg_controllen = CTRLMSG_LEN; { const int fd = STDERR_FILENO; memcpy(CMSG_DATA(cmptr), &fd, sizeof(fd)); // send stderr as fd } /* Force a wait on recvmsg() */ atomic_puts("M: sleeping again ..."); usleep(500000); atomic_printf("M: sendmsg'ing 0x%x to socket ...\n", msg_magic); sendmsg(sock, &mmsg.msg_hdr, 0); atomic_puts("M: ... done"); /* Force a wait on recvmmsg() */ atomic_puts("M: sleeping again ..."); usleep(500000); atomic_printf("M: sendmmsg'ing 0x%x to socket ...\n", msg_magic); breakpoint(); sendmmsg(sock, &mmsg, 1, 0); atomic_printf("M: ... sent %u bytes\n", mmsg.msg_len); /* Force a wait on recvmmsg() */ atomic_puts("M: sleeping again ..."); usleep(500000); atomic_printf("M: sendmmsg'ing(by socketcall) 0x%x to socket ...\n", msg_magic); #if defined(SYS_socketcall) struct sendmmsg_arg arg = { 0 }; arg.sockfd = sock; arg.msgvec = &mmsg; arg.vlen = 1; syscall(SYS_socketcall, SYS_SENDMMSG, (void*)&arg); #elif defined(SYS_sendmmsg) syscall(SYS_sendmmsg, sock, &mmsg, 1, 0); #else #error unable to call sendmmsg #endif free(cmptr); } { struct msghdr msg = { 0 }; struct iovec iovs[2]; char c1 = token++; char c2 = token++; iovs[0].iov_base = &c1; iovs[0].iov_len = sizeof(c1); iovs[1].iov_base = &c2; iovs[1].iov_len = sizeof(c2); msg.msg_iov = iovs; msg.msg_iovlen = sizeof(iovs) / sizeof(iovs[0]); /* Force a wait on recvmsg(). */ atomic_puts("M: sleeping again ..."); usleep(500000); atomic_printf("M: writing { '%c', '%c' } to socket ...\n", c1, c2); test_assert(2 == sendmsg(sock, &msg, 0)); atomic_puts("M: ... done"); } /* Force a wait on poll() */ atomic_puts("M: sleeping again ..."); usleep(500000); atomic_printf("M: writing '%c' to socket ...\n", token); write(sock, &token, sizeof(token)); ++token; atomic_puts("M: ... done"); /* Force a wait on ppoll() */ atomic_puts("M: sleeping again ..."); usleep(500000); atomic_printf("M: writing '%c' to socket ...\n", token); write(sock, &token, sizeof(token)); ++token; atomic_puts("M: ... done"); /* Force a wait on select(), raw syscall */ atomic_puts("M: sleeping again ..."); usleep(500000); atomic_printf("M: writing '%c' to socket ...\n", token); write(sock, &token, sizeof(token)); ++token; atomic_puts("M: ... done"); /* Force a wait on select(), library call */ atomic_puts("M: sleeping again ..."); usleep(500000); atomic_printf("M: writing '%c' to socket ...\n", token); write(sock, &token, sizeof(token)); ++token; atomic_puts("M: ... done"); /* Force a wait on epoll_wait() */ atomic_puts("M: sleeping again ..."); usleep(500000); atomic_printf("M: writing '%c' to socket ...\n", token); write(sock, &token, sizeof(token)); ++token; atomic_puts("M: ... done"); /* Force a wait on write() */ atomic_puts("M: sleeping again ..."); usleep(500000); atomic_printf("M: reading socket ...\n"); ++token; { char* buf = (char*)malloc(num_sockbuf_bytes); int i; for (i = 0; i < 2; ++i) { read_all_chunks(sock, buf, num_sockbuf_bytes, token); ++token; } free(buf); } atomic_puts("M: ... done"); /* Force a wait on read() */ atomic_puts("M: sleeping again ..."); usleep(500000); atomic_printf("M: writing '%c' to socket ...\n", token); write(sock, &token, sizeof(token)); atomic_puts("M: ... done"); pthread_join(reader, NULL); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/block_intr_sigchld.c000066400000000000000000000041511265436462100177060ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define NUM_ITERATIONS 10 #define NUM_PROCS_PER_ITERATION 10 #define MAGIC_EXIT_CODE 42 static int sockfds[2]; static const int msg_magic = 0x1337beef; const ssize_t num_sockbuf_bytes = 1 << 20; static void child_proc(void) { exit(MAGIC_EXIT_CODE); } static void* writer_thread(void* dontcare) { char token = '!'; int sock = sockfds[1]; int i; for (i = 0; i < NUM_ITERATIONS; ++i) { /* Force a wait on read() */ atomic_printf("w: iteration %d: sleeping ...\n", i); usleep(500000); atomic_printf("w: writing '%c' to socket ...\n", token); write(sock, &token, sizeof(token)); ++token; atomic_puts("w: ... done"); } atomic_puts("w: ... done"); return NULL; } int main(int argc, char* argv[]) { char token = '!'; char c = '\0'; pthread_t t; int sock; int i; socketpair(AF_LOCAL, SOCK_STREAM, 0, sockfds); sock = sockfds[0]; pthread_create(&t, NULL, writer_thread, NULL); for (i = 0; i < NUM_ITERATIONS; ++i) { pid_t procs[NUM_PROCS_PER_ITERATION]; int j; atomic_printf("M: iteration %d: forking processes before read ...\n", i); for (j = 0; j < NUM_PROCS_PER_ITERATION; ++j) { if (0 == (procs[j] = fork())) { child_proc(); test_assert("Not reached" && 0); } } atomic_printf("M: sleeping for a bit ..."); usleep(10000); atomic_printf("M: reading socket ...\n"); test_assert(1 == read(sock, &c, sizeof(c))); atomic_printf("M: ... read '%c'\n", c); test_assert(c == token); ++token; for (j = 0; j < NUM_PROCS_PER_ITERATION; ++j) { int status; int child = procs[j]; int pid = waitpid(child, &status, 0); int err = errno; atomic_printf("M: waitpid(%d) returns %d(%s) and status %#x\n", child, pid, strerror(err), status); test_assert(child == pid); test_assert(WIFEXITED(status) && MAGIC_EXIT_CODE == WEXITSTATUS(status)); } } atomic_printf("M: ... done\n"); pthread_join(t, NULL); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/block_intr_sigchld.run000066400000000000000000000006671265436462100203000ustar00rootroot00000000000000source `dirname $0`/util.sh # The underlying failure here is difficult to reproduce without a lot # of tracee concurrency. RECORD_ARGS="-e 1" # TODO: when the syscallbuf is disabled, this test intermittently fails # with an error that looks like # # -> Failed to waitpid(12282) # # It's not understood why this happens, and it's not seen anywhere else, # so we disable the test for now. skip_if_no_syscall_buf compare_test EXIT-SUCCESS rr-4.1.0/src/test/blocked_bad_ip.c000066400000000000000000000015541265436462100167700ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void fault_handler(int sig, siginfo_t* si, void* context) { atomic_puts("FAILED: handler should not have been called for blocked signal"); } static void* start_thread(void* p) { sigset_t s; syscall(SYS_write, STDOUT_FILENO, "EXIT-", 5); sigemptyset(&s); sigaddset(&s, SIGSEGV); sigprocmask(SIG_BLOCK, &s, NULL); syscall(SYS_write, STDOUT_FILENO, "SUCCESS\n", 8, 9, 10, 11); __asm__ __volatile__("jmp 0x42"); return NULL; } int main(int argc, char* argv[]) { struct sigaction act; pthread_t thread; act.sa_sigaction = fault_handler; act.sa_flags = SA_SIGINFO | SA_NODEFER; sigemptyset(&act.sa_mask); sigaction(SIGSEGV, &act, NULL); pthread_create(&thread, NULL, start_thread, NULL); pthread_join(thread, NULL); return 0; } rr-4.1.0/src/test/blocked_bad_ip.py000066400000000000000000000003011265436462100171630ustar00rootroot00000000000000from rrutil import * send_gdb('c') expect_rr('EXIT-SUCCESS') expect_gdb('SIGSEGV') send_gdb('reverse-stepi') expect_gdb('SIGSEGV') send_gdb('reverse-stepi') expect_gdb('start_thread') ok() rr-4.1.0/src/test/blocked_bad_ip.run000066400000000000000000000000471265436462100173460ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/blocked_sigsegv.c000066400000000000000000000014101265436462100172100ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void fault_handler(int sig, siginfo_t* si, void* context) { atomic_puts("FAILED: handler should not have been called for blocked signal"); } static void* start_thread(void* p) { sigset_t s; sigemptyset(&s); sigaddset(&s, SIGSEGV); sigprocmask(SIG_BLOCK, &s, NULL); atomic_puts("EXIT-SUCCESS"); *(int*)NULL = 0; return NULL; } int main(int argc, char* argv[]) { struct sigaction act; pthread_t thread; act.sa_sigaction = fault_handler; act.sa_flags = SA_SIGINFO | SA_NODEFER; sigemptyset(&act.sa_mask); sigaction(SIGSEGV, &act, NULL); pthread_create(&thread, NULL, start_thread, NULL); pthread_join(thread, NULL); return 0; } rr-4.1.0/src/test/break_block.py000066400000000000000000000002421265436462100165240ustar00rootroot00000000000000from rrutil import * send_gdb('b breakpoint') expect_gdb('Breakpoint 1') for i in xrange(2): send_gdb('c') expect_gdb('Breakpoint 1, breakpoint') ok() rr-4.1.0/src/test/break_block.run000066400000000000000000000001031265436462100166740ustar00rootroot00000000000000source `dirname $0`/util.sh record block$bitness debug break_block rr-4.1.0/src/test/break_clock.py000066400000000000000000000002421265436462100165250ustar00rootroot00000000000000from rrutil import * send_gdb('b breakpoint') expect_gdb('Breakpoint 1') for i in xrange(3): send_gdb('c') expect_gdb('Breakpoint 1, breakpoint') ok() rr-4.1.0/src/test/break_clock.run000066400000000000000000000001031265436462100166750ustar00rootroot00000000000000source `dirname $0`/util.sh record clock$bitness debug break_clock rr-4.1.0/src/test/break_clone.py000066400000000000000000000002421265436462100165320ustar00rootroot00000000000000from rrutil import * send_gdb('b breakpoint') expect_gdb('Breakpoint 1') for i in xrange(3): send_gdb('c') expect_gdb('Breakpoint 1, breakpoint') ok() rr-4.1.0/src/test/break_clone.run000066400000000000000000000001031265436462100167020ustar00rootroot00000000000000source `dirname $0`/util.sh record clone$bitness debug break_clone rr-4.1.0/src/test/break_exec.run000066400000000000000000000001111265436462100165250ustar00rootroot00000000000000source `dirname $0`/util.sh record exec_self$bitness debug generic_break rr-4.1.0/src/test/break_int3.py000066400000000000000000000002041265436462100163050ustar00rootroot00000000000000from rrutil import * send_gdb('b int3.c:3') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1, breakpoint') ok() rr-4.1.0/src/test/break_int3.run000066400000000000000000000002561265436462100164700ustar00rootroot00000000000000source `dirname $0`/util.sh fails "gdb and rr don't yet agree what should happen when a breakpoint is set on a breakpoint instruction" record int3$bitness debug break_int3 rr-4.1.0/src/test/break_mmap_private.py000066400000000000000000000002421265436462100201160ustar00rootroot00000000000000from rrutil import * send_gdb('b breakpoint') expect_gdb('Breakpoint 1') for i in xrange(3): send_gdb('c') expect_gdb('Breakpoint 1, breakpoint') ok() rr-4.1.0/src/test/break_mmap_private.run000066400000000000000000000001211265436462100202660ustar00rootroot00000000000000source `dirname $0`/util.sh record mmap_private$bitness debug break_mmap_private rr-4.1.0/src/test/break_msg.run000066400000000000000000000001031265436462100163700ustar00rootroot00000000000000source `dirname $0`/util.sh record msg$bitness debug generic_break rr-4.1.0/src/test/break_rdtsc.run000066400000000000000000000001051265436462100167230ustar00rootroot00000000000000source `dirname $0`/util.sh record rdtsc$bitness debug generic_break rr-4.1.0/src/test/break_sigreturn.run000066400000000000000000000001121265436462100176240ustar00rootroot00000000000000source `dirname $0`/util.sh record intr_sleep$bitness debug generic_break rr-4.1.0/src/test/break_sync_signal.run000066400000000000000000000001101265436462100201110ustar00rootroot00000000000000source `dirname $0`/util.sh record segfault$bitness debug generic_break rr-4.1.0/src/test/break_thread.run000066400000000000000000000001071265436462100170550ustar00rootroot00000000000000source `dirname $0`/util.sh record barrier$bitness debug generic_break rr-4.1.0/src/test/break_time_slice.py000066400000000000000000000002631265436462100175520ustar00rootroot00000000000000from rrutil import * send_gdb('b breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1, breakpoint') send_gdb('c') expect_gdb('exited normally') ok() rr-4.1.0/src/test/break_time_slice.run000066400000000000000000000001131265436462100177200ustar00rootroot00000000000000source `dirname $0`/util.sh record chew_cpu$bitness debug break_time_slice rr-4.1.0/src/test/breakpoint.c000066400000000000000000000006441265436462100162240ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void C(void) { atomic_puts("in C"); } static void B(void) { atomic_puts("calling C"); C(); atomic_puts("finished C"); } static void A(void) { atomic_puts("calling B"); B(); atomic_puts("finished B"); } int main(void) { atomic_puts("calling A"); A(); atomic_puts("finished A"); return 0; } rr-4.1.0/src/test/breakpoint.py000066400000000000000000000003361265436462100164300ustar00rootroot00000000000000from rrutil import * send_gdb('b C') expect_gdb('Breakpoint 1') send_gdb('c') expect_rr('calling C') expect_gdb('Breakpoint 1, C') send_gdb('bt') expect_gdb('#0[^C]+C[^#]+#1[^B]+B[^#]+#2[^A]+A[^#]+#3[^m]+main') ok() rr-4.1.0/src/test/breakpoint.run000066400000000000000000000000471265436462100166030ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/breakpoint_conditions.c000066400000000000000000000010661265436462100204540ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpointA(int v4) { int break_here = 1; (void)break_here; } static void breakpointB(int v4) { int break_here = 1; (void)break_here; } int v0 = 0; int v1 = 1; int v2 = 2; int v3 = 3; int vm1 = -1; int vm2 = -2; uint64_t u64max = (uint64_t)(int64_t)-1; int* p = (int*)&u64max; int main(int argc, char* argv[]) { int i; for (i = 0; i < 10000; ++i) { breakpointA(4); breakpointB(4); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/breakpoint_conditions.py000066400000000000000000000017441265436462100206650ustar00rootroot00000000000000from rrutil import * def test_cond(c): send_gdb('cond 1 %s'%c) # check that the condition is evaluated correctly by checking that # we don't break on the negation of the condition send_gdb('cond 2 !(%s)'%c) send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('b breakpointA') expect_gdb('Breakpoint 1') send_gdb('b breakpointB') expect_gdb('Breakpoint 2') test_cond('v1==1') test_cond('v1!=2') test_cond('v4==4') test_cond('v1+v2==3') test_cond('v2-1==v1') test_cond('v3-v2==1') test_cond('v4>>2==v1') test_cond('v1<<2==v4') test_cond('(unsigned char)u64max==255') test_cond('v2*v2==4') test_cond('v4/v2==2') test_cond('v4/vm2==-2') test_cond('v3%v2==1') test_cond('v3%vm2==1') test_cond('!v1==v0') test_cond('v1|v2==3') test_cond('v3&v2==2') test_cond('v3^v2==1') test_cond('~v0==(int)u64max') test_cond('v0?v1:v2==2') test_cond('*p==(int)u64max') test_cond('*(unsigned char*)p==255') test_cond('*(short int*)p==-1') test_cond('*(long long*)p==(long long)u64max') ok() rr-4.1.0/src/test/breakpoint_conditions.run000066400000000000000000000000471265436462100210340ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/breakpoint_consistent.py000066400000000000000000000005061265436462100207000ustar00rootroot00000000000000from rrutil import * send_gdb('b C') expect_gdb('Breakpoint 1') send_gdb('b main') expect_gdb('Breakpoint 2') send_gdb('c') expect_gdb('Breakpoint 2, main') send_gdb('cond 1 rand()&1 < 10') send_gdb('c') expect_rr('calling C') expect_gdb('Breakpoint 1, C') send_gdb('check') send_gdb('c') expect_rr('finished C') ok() rr-4.1.0/src/test/breakpoint_consistent.run000066400000000000000000000001221265436462100210460ustar00rootroot00000000000000source `dirname $0`/util.sh record breakpoint$bitness debug breakpoint_consistent rr-4.1.0/src/test/breakpoint_overlap.c000066400000000000000000000041111265436462100177450ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" /* This is a difficult bug to trigger because we need to set a breakpoint where a SCHED event will stop, and the breakpoint has to fire exactly at the moment the SCHED event fires. So we need a SCHED event to fire at a location when it's the first time we've executed that location. Setting the context switch time to something small-ish like -c100 should help. Then we generate a lot of conditional branches. */ #define STATEMENT(i) \ if (a * (i) < b) { \ ++a; \ } else { \ ++b; \ } #define STATEMENT2(i) STATEMENT(i) STATEMENT(i + 1) #define STATEMENT4(i) STATEMENT2(i) STATEMENT2(i + 2) #define STATEMENT8(i) STATEMENT4(i) STATEMENT4(i + 4) #define STATEMENT16(i) STATEMENT8(i) STATEMENT8(i + 8) #define STATEMENT32(i) STATEMENT16(i) STATEMENT16(i + 16) #define STATEMENT64(i) STATEMENT32(i) STATEMENT32(i + 32) #define STATEMENT128(i) STATEMENT64(i) STATEMENT64(i + 64) #define STATEMENT256(i) STATEMENT128(i) STATEMENT128(i + 128) #define STATEMENT512(i) STATEMENT256(i) STATEMENT256(i + 256) #define STATEMENT1024(i) STATEMENT512(i) STATEMENT512(i + 512) #define STATEMENT2048(i) STATEMENT1024(i) STATEMENT1024(i + 1024) #define STATEMENT4096(i) STATEMENT2048(i) STATEMENT2048(i + 2048) static void* do_thread(void* p) { while (1) { sched_yield(); } return NULL; } int main(int argc, char** argv) { int a = atoi(argv[1]); int b = atoi(argv[2]); pthread_t thread; /* Create an extra thread so context switches can happen and SCHED events will be recorded. */ pthread_create(&thread, NULL, do_thread, NULL); /* This syscall signals the test that we're in the test body proper */ getgid(); STATEMENT4096(0) return a + b; } rr-4.1.0/src/test/breakpoint_overlap.py000066400000000000000000000022161265436462100201570ustar00rootroot00000000000000import collections import sys import re from rrutil import * arch = get_exe_arch() ArchInfo = collections.namedtuple('ArchInfo', ['syscall', 'ip_name']) regex_info = { 'i386': ArchInfo('getgid32', 'eip'), 'i386:x86-64': ArchInfo('getgid', 'rip'), } syscall_re = re.compile("`SYSCALL: %s' \\(state:EXITING_SYSCALL\\)" % regex_info[arch].syscall) sched_re = re.compile("`SCHED'") eip_re = re.compile("%s:(0x[a-f0-9]+)" % regex_info[arch].ip_name) sched_enabled = False eip_enabled = False eip = None while True: line = sys.stdin.readline() if not line: break if syscall_re.search(line): sched_enabled = True if sched_enabled and sched_re.search(line): eip_enabled = True if eip_enabled: m = eip_re.search(line) if m: eip = m.group(1) break if eip is None: failed('%s not found' % regex_info[arch].ip_name) send_gdb('b *%s'%eip) expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') expect_gdb('(rr)') send_gdb('p/x *(char*)$pc') expect_gdb('0x([a-f0-9]+)') if last_match().group(1) is 'cc': failed('saw breakpoint at current instruction') ok() rr-4.1.0/src/test/breakpoint_overlap.run000066400000000000000000000006641265436462100203400ustar00rootroot00000000000000source `dirname $0`/util.sh # It's relatively easy to reproduce a CPUID divergence caused by lack # of CPU binding. GLOBAL_OPTIONS="$GLOBAL_OPTIONS_BIND_CPU" RECORD_ARGS="-c100" record breakpoint_overlap$bitness 3 4 # Don't use pipes here since we need 'debug' to run in the same bash process rr --suppress-environment-warnings dump $workdir/latest-trace > $workdir/plaintext-trace debug breakpoint_overlap < $workdir/plaintext-trace rr-4.1.0/src/test/brk.c000066400000000000000000000006351265436462100146440ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { void* prev; void* start = sbrk(0); test_assert((intptr_t)start != -1); test_assert(start == sbrk(111)); memset(start, 0xaa, 111); prev = sbrk(1000000); test_assert(prev != (void*)-1); test_assert(0 == brk(prev)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/brk2.c000066400000000000000000000012501265436462100147200ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { void* start = (void*)syscall(SYS_brk, 0); void* p = (void*)syscall(SYS_brk, start + 5000); int res; void* pp; void* q; void* r; res = mprotect((void*)(((long)start + 4095) & ~(long)4095), 4096, PROT_READ); test_assert(res == 0); pp = (void*)syscall(SYS_brk, 0); test_assert(pp == p); *(char*)p = 77; q = (void*)syscall(SYS_brk, p + 5000); test_assert(p + 5000 == q); test_assert(*(char*)p == 77); r = (void*)syscall(SYS_brk, start + 1); test_assert(start < r); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/call_exit.py000066400000000000000000000003711265436462100162350ustar00rootroot00000000000000from rrutil import * send_gdb('b main') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1, main') send_gdb('call exit(0)') expect_gdb('while in a function called from GDB') restart_replay() expect_gdb('Breakpoint 1, main') ok() rr-4.1.0/src/test/call_exit.run000066400000000000000000000001021265436462100164010ustar00rootroot00000000000000source `dirname $0`/util.sh record simple$bitness debug call_exit rr-4.1.0/src/test/call_function.c000066400000000000000000000030311265436462100166770ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int var; static void breakpoint(void) { int break_here = 1; (void)break_here; } static void mutate_var(void) { var = 22; atomic_printf("var is %d\n", var); } static void print_nums(void) { int i; for (i = 1; i <= 5; ++i) { atomic_printf("%d ", i); } atomic_puts(""); } static void alloc_and_print(void) { static const int num_bytes = 4096; char* str = mmap(NULL, num_bytes, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); snprintf(str, num_bytes, "Hello %d", var); atomic_puts(str); munmap(str, num_bytes); } static void make_unhandled_syscall(void) { ssize_t ret = kill(getpid(), SIGKILL); /* XXX the error return is somewhat arbitrary here, but as * long as |splice()| remains unimplemented in experiment * mode, it's reasonable to assume that the libc wrapper will * return -1 back to us. */ atomic_printf("return from kill: %zd\n", ret); } static void print_time(void) { struct timespec ts = { -1, -1 }; double now_sec; clock_gettime(CLOCK_MONOTONIC, &ts); now_sec = (double)ts.tv_sec + (double)ts.tv_nsec / 1e9; atomic_printf("now is %g sec\n", now_sec); } int main(int argc, char* argv[]) { var = -42; breakpoint(); atomic_printf("var is %d\n", var); test_assert(var == -42); atomic_puts("EXIT-SUCCESS"); return 0; /* not reached */ mutate_var(); print_nums(); alloc_and_print(); make_unhandled_syscall(); print_time(); } rr-4.1.0/src/test/call_function.py000066400000000000000000000007631265436462100171160ustar00rootroot00000000000000from rrutil import * send_gdb('b breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1, breakpoint') send_gdb('call mutate_var()') expect_gdb('var is 22') send_gdb('call print_nums()') expect_gdb('1 2 3 4 5') send_gdb('call alloc_and_print()') expect_gdb('Hello 22') send_gdb('call make_unhandled_syscall()') expect_gdb('return from kill: -1') send_gdb('call print_time()') expect_gdb(r'now is \d+(\.\d+(e\+\d\d)?)? sec') send_gdb('c') expect_rr('var is -42') ok() rr-4.1.0/src/test/call_function.run000066400000000000000000000000471265436462100172650ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/capget.c000066400000000000000000000016421265436462100153300ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" extern int capget(cap_user_header_t header, cap_user_data_t data); int main(int argc, char* argv[]) { struct __user_cap_header_struct* hdr; struct __user_cap_data_struct* data; ALLOCATE_GUARD(hdr, 'a'); hdr->version = 0; hdr->pid = 0; test_assert(0 == capget(hdr, NULL)); test_assert(hdr->version > 0); VERIFY_GUARD(hdr); ALLOCATE_GUARD(hdr, 'a'); hdr->version = _LINUX_CAPABILITY_VERSION_1; hdr->pid = 0; ALLOCATE_GUARD(data, 'b'); test_assert(0 == capget(hdr, data)); VERIFY_GUARD(hdr); VERIFY_GUARD(data); ALLOCATE_GUARD(hdr, 'c'); hdr->version = _LINUX_CAPABILITY_VERSION_3; hdr->pid = 0; data = allocate_guard(sizeof(*data) * 2, 'd'); test_assert(0 == capget(hdr, data)); VERIFY_GUARD(hdr); verify_guard(sizeof(*data) * 2, data); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/check_patched_pthread.py000066400000000000000000000004421265436462100205440ustar00rootroot00000000000000from rrutil import * import re send_gdb('b main') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('p ((int*)&__elision_aconf)[2]') expect_gdb(re.compile(r'= 0|No symbol')) send_gdb('p/x *(char*)elision_init') expect_gdb(re.compile(r'= 0xc3|No symbol')) ok() rr-4.1.0/src/test/check_patched_pthread.run000066400000000000000000000001171265436462100207170ustar00rootroot00000000000000source `dirname $0`/util.sh record threads$bitness debug check_patched_pthread rr-4.1.0/src/test/check_syscall_perf_interval.py000066400000000000000000000027651265436462100220310ustar00rootroot00000000000000import sys import re if len(sys.argv) < 4: print '''Usage: %s Exits with status 0 if exactly the expected number of perf events occur between every pair of consecutive system calls of the given type.''' % sys.argv[0] sys.exit(2) syscall = sys.argv[1] counter = sys.argv[2] expected_count = int(sys.argv[3]) last_perfctr_value = -1 syscall_re = re.compile("`SYSCALL: (\\w+)' \\(state:0\\)") perfctr_re = re.compile(counter + ":(\\d+)") while True: line = sys.stdin.readline() if not line: sys.exit(0) m = syscall_re.search(line) if m: if m.group(1) == syscall: line = sys.stdin.readline() m = perfctr_re.search(line) if m: v = int(m.group(1)) if last_perfctr_value >= 0 and v - last_perfctr_value != expected_count: print "Mismatch: saw %d %ss between %ss (from %d to %d), expected %d" % \ (v - last_perfctr_value, counter, syscall, last_perfctr_value, v, expected_count) sys.exit(1) last_perfctr_value = v else: # Ignore nonconsecutive syscalls. In the cpuid test, we have # two batches of geteuid32s; one injected by rr itself to detect # a buggy system, and a separate one for the test. We need to # ignore the geteuid32 pair that spans the gap between the batches. last_perfctr_value = -1 rr-4.1.0/src/test/checkpoint_async_signal_syscalls_1000.run000066400000000000000000000011731265436462100237040ustar00rootroot00000000000000source `dirname $0`/util.sh timeslice=1000 RECORD_ARGS="-c$timeslice" record async_signal_syscalls$bitness 9 num_events=$(count_events) # This recording has a large number of events, and it's impractical to # run the debugger for each one. The original bug reproduces when the # debugger attaches to pretty much any event past event 350, so we # somewhat arbitrarily choose a stride that reduces the number of # debug sessions by about 10x. stride=80 for i in $(seq 1 $stride $num_events); do echo Checkpointing at event $i ... debug restart_finish "-g $i" if [[ "$leave_data" == "y" ]]; then break fi done rr-4.1.0/src/test/checkpoint_dying_threads.c000066400000000000000000000025431265436462100211210ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpoint(void) {} static int thread_to_main_fds[2]; static int main_to_child_fds[2]; static int wait_forever_fds[2]; static char ch = 'X'; static void* run_thread(void* p) { test_assert(1 == write(thread_to_main_fds[1], &ch, 1)); read(wait_forever_fds[0], &ch, 1); test_assert(0); return NULL; } static int run_child(void) { test_assert(1 == read(main_to_child_fds[0], &ch, 1)); /* At this point, the parent's main thread should have exit_group()ed and its extra thread should have died but not been scheduled yet. Try to take a checkpoint in this state. */ breakpoint(); atomic_puts("EXIT-SUCCESS"); return 0; } int main(int argc, char** argv) { pthread_t thread; pid_t child; test_assert(0 == pipe(thread_to_main_fds)); test_assert(0 == pipe(main_to_child_fds)); test_assert(0 == pipe(wait_forever_fds)); child = fork(); if (!child) { return run_child(); } atomic_printf("child %d\n", child); test_assert(0 == pthread_create(&thread, NULL, run_thread, NULL)); test_assert(1 == read(thread_to_main_fds[0], &ch, 1)); /* thread should have blocked on its wait-forever read. Tell the child to proceed after we exit_group. */ test_assert(1 == write(main_to_child_fds[1], &ch, 1)); return 0; } rr-4.1.0/src/test/checkpoint_dying_threads.py000066400000000000000000000003331265436462100213220ustar00rootroot00000000000000from rrutil import * send_gdb('break breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('checkpoint') send_gdb('c') expect_rr('EXIT-SUCCESS') expect_gdb('exited normally') ok() rr-4.1.0/src/test/checkpoint_dying_threads.run000066400000000000000000000003051265436462100214750ustar00rootroot00000000000000source `dirname $0`/util.sh record $TESTNAME TARGET_PID=$(grep 'child ' record.out | awk '{print $2}') echo Targeting recorded pid $TARGET_PID ... debug checkpoint_dying_threads "-f $TARGET_PID" rr-4.1.0/src/test/checkpoint_mixed_mode.c000066400000000000000000000013751265436462100204110ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int create_segment(size_t num_bytes) { char filename[] = "/dev/shm/rr-test-XXXXXX"; int fd = mkstemp(filename); unlink(filename); test_assert(fd >= 0); ftruncate(fd, num_bytes); return fd; } static void breakpoint(void) {} int main(int argc, char* argv[]) { size_t num_bytes = sysconf(_SC_PAGESIZE); int fd = create_segment(num_bytes); char* p = mmap(NULL, num_bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); char* shared_p; p[0] = 77; shared_p = mmap(NULL, num_bytes, PROT_READ, MAP_SHARED, fd, 0); breakpoint(); test_assert(p[0] == 77); test_assert(shared_p[0] == 0); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/checkpoint_mixed_mode.py000066400000000000000000000004411265436462100206100ustar00rootroot00000000000000from rrutil import * send_gdb('b breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('checkpoint') expect_gdb('= 1') send_gdb('n') send_gdb('restart 1') expect_gdb('breakpoint') send_gdb('c') expect_rr('EXIT-SUCCESS') expect_gdb('xited normally') rr-4.1.0/src/test/checkpoint_mixed_mode.run000066400000000000000000000000471265436462100207660ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/checkpoint_mmap_shared.run000066400000000000000000000001041265436462100211260ustar00rootroot00000000000000source `dirname $0`/util.sh checkpoint_test mmap_shared$bitness 7 9 rr-4.1.0/src/test/checkpoint_prctl_name.run000066400000000000000000000001051265436462100207730ustar00rootroot00000000000000source `dirname $0`/util.sh checkpoint_test prctl_name$bitness 11 13 rr-4.1.0/src/test/checkpoint_simple.run000066400000000000000000000000771265436462100201500ustar00rootroot00000000000000source `dirname $0`/util.sh checkpoint_test simple$bitness 3 5 rr-4.1.0/src/test/chew_cpu.c000066400000000000000000000013021265436462100156530ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define NUM_ITERATIONS (1 << 30) static void breakpoint(void) { int break_here = 1; (void)break_here; } int spin(void) { int i, dummy = 0; atomic_puts("spinning"); /* NO SYSCALLS AFTER HERE: the point of this test is to hit * hpc interrupts to exercise the nonvoluntary interrupt * scheduler. */ for (i = 1; i < NUM_ITERATIONS; ++i) { dummy += i % (1 << 20); dummy += i % (79 * (1 << 20)); if (i == NUM_ITERATIONS / 2) { breakpoint(); } } return dummy; } int main(int argc, char* argv[]) { atomic_printf("EXIT-SUCCESS dummy=%d\n", spin()); return 0; } rr-4.1.0/src/test/chown.c000066400000000000000000000030471265436462100152040ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define DUMMY_FILENAME "foo.txt" static gid_t get_gid(int fd) { struct stat* st; gid_t result; ALLOCATE_GUARD(st, 'x'); test_assert(0 == fstat(fd, st)); result = st->st_gid; FREE_GUARD(st); return result; } static void change_group(const char* path, gid_t new_gid) { test_assert(0 == chown(path, geteuid(), new_gid)); } static void change_group_fd(int fd, gid_t new_gid) { test_assert(0 == fchown(fd, geteuid(), new_gid)); } int main(int argc, char* argv[]) { gid_t groups[32]; int ngroups; gid_t this_group, other_group; int fd; this_group = getegid(); atomic_printf("Current group is %d\n", this_group); ngroups = getgroups(ALEN(groups), groups); test_assert(ngroups > 0); other_group = groups[0]; if (this_group == other_group && ngroups > 1) { other_group = groups[1]; } if (this_group == other_group) { atomic_puts("WARNING: unable to properly test chown()"); } fd = creat(DUMMY_FILENAME, 0600); test_assert(fd >= 0); atomic_printf("Group owner of %s is %d\n", DUMMY_FILENAME, get_gid(fd)); test_assert(this_group == get_gid(fd)); change_group(DUMMY_FILENAME, other_group); atomic_printf(" ... now owner is %d\n", get_gid(fd)); test_assert(other_group == get_gid(fd)); change_group_fd(fd, this_group); atomic_printf(" ... now back to original owner %d\n", get_gid(fd)); test_assert(this_group == get_gid(fd)); unlink(DUMMY_FILENAME); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/clock.c000066400000000000000000000024731265436462100151630ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpoint(void) { int break_here = 1; (void)break_here; } int main(void) { struct timespec ts; struct timeval tv; int i; clock_getres(CLOCK_MONOTONIC, &ts); atomic_printf("Clock resolution is >= %g us\n", ((double)ts.tv_nsec) / 1.0e3); memset(&ts, 0, sizeof(ts)); memset(&tv, 0, sizeof(tv)); breakpoint(); for (i = 0; i < 100; ++i) { struct timespec ts_now; struct timeval tv_now; clock_gettime(CLOCK_MONOTONIC, &ts_now); test_assert(ts.tv_sec < ts_now.tv_sec || (ts.tv_sec == ts_now.tv_sec && ts.tv_nsec <= ts_now.tv_nsec)); ts = ts_now; if (i == 50) { breakpoint(); } /* technically gettimeofday() isn't monotonic, but the * value of this check is higher than the remote * possibility of a spurious failure */ gettimeofday(&tv_now, NULL); test_assert(tv.tv_sec < tv_now.tv_sec || (tv.tv_sec == tv_now.tv_sec && tv.tv_usec <= tv_now.tv_usec)); tv = tv_now; atomic_printf("cg: %g %llu, gtod: %g %llu\n", (double)ts.tv_sec, (long long int)ts.tv_nsec, (double)tv.tv_sec, (long long int)tv.tv_usec); } breakpoint(); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/clone.c000066400000000000000000000050511265436462100151630ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int futex(int* uaddr, int op, int val, const struct timespec* timeout, int* uaddr2, int val2) { return syscall(SYS_futex, uaddr, op, val, timeout, uaddr2, val2); } static pid_t child_tid; static pid_t child_tid_copy; static void breakpoint(void) { int break_here = 1; (void)break_here; } static int child(void* arg) { sigset_t set; /* Be careful in here. This thread was set up by a raw clone() call * without TLS support so many things won't work, e.g. atomic_printf. */ sigfillset(&set); /* NB: we have to naughtily make the linux assumption that * sigprocmask is per-task, because we're not a real * pthread. */ test_assert(0 == syscall(SYS_rt_sigprocmask, SIG_UNBLOCK, &set, NULL, _NSIG / 8)); /* clone() should have set child_tid to our tid */ child_tid_copy = child_tid; breakpoint(); /* We cannot return normally here. Some clone() implementations call |_exit| after the clone function returns; some call SYS_exit. For consistency and correctness's sake, we need to call SYS_exit here. */ syscall(SYS_exit, 0); /* NOT REACHED */ return 0; } int main(int argc, char* argv[]) { const size_t stack_size = 1 << 20; void* stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); int pid; sigset_t set; sys_gettid(); /* NB: no syscalls in between the sys_gettid() above and this * clone(). */ breakpoint(); /* Warning: strace gets the parameter order wrong and will print child_tidptr as 0 here. */ pid = clone(child, stack + stack_size, CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_THREAD | CLONE_SIGHAND | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID, NULL, &child_tid, NULL, &child_tid); atomic_printf("clone()d pid: %d\n", pid); test_assert(pid > 0); futex(&child_tid, FUTEX_WAIT, pid, NULL, NULL, 0); test_assert(child_tid_copy == pid); /* clone() should have cleared child_tid now */ test_assert(child_tid == 0); sys_gettid(); sigfillset(&set); test_assert(0 == sigprocmask(SIG_BLOCK, &set, NULL)); /* NB: no syscalls in between the sys_gettid() above and this * clone(). */ breakpoint(); pid = clone(child, stack + stack_size, CLONE_SIGHAND /*must also have CLONE_VM*/, NULL, NULL, NULL); atomic_printf("clone(CLONE_SIGHAND)'d pid: %d\n", pid); test_assert(-1 == pid); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/clone_bad_stack.c000066400000000000000000000005601265436462100171560ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { int status = -1; if (syscall(SYS_clone, SIGCHLD, (intptr_t)-1, (intptr_t)-1, (intptr_t)-1, (intptr_t)-1) == 0) { _exit(0); } test_assert(wait(&status) >= 0); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/clone_immediate_exit.c000066400000000000000000000007231265436462100202330ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void* run_thread(void* p) { atomic_puts("EXIT-SUCCESS"); exit(0); return NULL; } int main(int argc, char* argv[]) { pthread_t thread; pthread_create(&thread, NULL, run_thread, NULL); /* The signal will be delivered to the thread before any code runs in the thread. */ pthread_kill(thread, SIGCHLD); syscall(SYS_exit, 0); return 0; } rr-4.1.0/src/test/clone_interruption.c000066400000000000000000000031341265436462100200050ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void futex(int* uaddr, int op, int val) { /* Avoid using the rr-page syscall entrypoints, so we don't trigger any special treatment that might hide bugs. */ #ifdef __x86_64__ __asm__("mov $0,%%r10\n\t" "syscall\n\t" ::"a"(SYS_futex), "D"(uaddr), "S"(op), "d"(val)); #elif defined(__i386__) __asm__("xchg %%ebx,%%edi\n\t" "int $0x80\n\t" "xchg %%ebx,%%edi\n\t" ::"a"(SYS_futex), "c"(op), "d"(val), "S"(NULL), "D"(uaddr)); #else syscall(SYS_futex, uaddr, op, val, NULL, NULL, 0); #endif } static int thread_to_main_fds[2]; static void signal_handler(int sig) { char ch = 'X'; test_assert(sig == SIGCHLD); test_assert(1 == write(thread_to_main_fds[1], &ch, 1)); } static void* run_thread(void* p) { char ch = 'X'; int futex_val = 0; test_assert(SIG_ERR != signal(SIGCHLD, signal_handler)); test_assert(1 == write(thread_to_main_fds[1], &ch, 1)); futex(&futex_val, FUTEX_WAIT, 0); test_assert(0); return NULL; } int main(int argc, char* argv[]) { pthread_t thread; char ch; int i; sigset_t mask; test_assert(0 == pipe(thread_to_main_fds)); pthread_create(&thread, NULL, run_thread, NULL); sigemptyset(&mask); sigaddset(&mask, SIGCHLD); pthread_sigmask(SIG_SETMASK, &mask, NULL); test_assert(1 == read(thread_to_main_fds[0], &ch, 1)); for (i = 0; i < 1000; ++i) { geteuid(); } kill(getpid(), SIGCHLD); test_assert(1 == read(thread_to_main_fds[0], &ch, 1)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/clone_interruption.py000066400000000000000000000002731265436462100202140ustar00rootroot00000000000000import collections import sys import re from rrutil import * send_gdb('checkpoint') expect_gdb('= 1') send_gdb('n') send_gdb('restart 1') send_gdb('c') expect_rr('EXIT-SUCCESS') ok() rr-4.1.0/src/test/clone_interruption.run000066400000000000000000000005151265436462100203670ustar00rootroot00000000000000source `dirname $0`/util.sh RECORD_ARGS="-c100" record $TESTNAME # Don't use pipes here since we need 'debug' to run in the same bash process declare -i stop_at_event=`rr --suppress-environment-warnings dump $workdir/latest-trace | \ python2 $TESTDIR/clone_interruption_finder.py`+1 debug clone_interruption "-g $stop_at_event" rr-4.1.0/src/test/clone_interruption_finder.py000066400000000000000000000005601265436462100215420ustar00rootroot00000000000000import sys import re syscall_re = re.compile("`SYSCALL: futex' \\(state:EXITING_SYSCALL\\)") time_re = re.compile("global_time:(\d+)") futex_time = 999999999 while True: line = sys.stdin.readline() if not line: break if syscall_re.search(line): m = time_re.search(line) if m: futex_time = m.group(1) print futex_time rr-4.1.0/src/test/clone_untraced.c000066400000000000000000000010301265436462100170410ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int pipe_fds[2]; static int run_thread(void* p) { test_assert(1 == write(pipe_fds[1], ".", 1)); return 0; } int main(int argc, char* argv[]) { char* stack = (char*)malloc(65536) + 65536; int ret; char ch; test_assert(0 == pipe(pipe_fds)); ret = clone(run_thread, stack, CLONE_UNTRACED, NULL); test_assert(ret >= 0); test_assert(1 == read(pipe_fds[0], &ch, 1)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/clone_vfork.c000066400000000000000000000012051265436462100163670ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int clonefunc(void* exe) { execl(exe, exe, NULL); test_assert("Not reached" && 0); return 0; } int main(int argc, char* argv[]) { char child_stack[16384]; const char* exe; pid_t child; int status; test_assert(2 == argc); exe = argv[1]; child = clone(clonefunc, child_stack + sizeof(child_stack), CLONE_VFORK | SIGCHLD, (void*)exe); test_assert(child == waitpid(child, &status, 0)); test_assert(WIFEXITED(status) && 0 == WEXITSTATUS(status)); atomic_puts("clone-vfork-EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/clone_vfork.run000066400000000000000000000002021265436462100167450ustar00rootroot00000000000000source `dirname $0`/util.sh save_exe simple$bitness record $TESTNAME simple$bitness-$nonce replay check clone-vfork-EXIT-SUCCESS rr-4.1.0/src/test/conditional_breakpoint_calls.c000066400000000000000000000006121265436462100217600ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int counter; int dot_counter; int checker(void) { ++counter; return 0; } static void print_dot(void) { atomic_printf("."); ++dot_counter; } int main(int argc, char** argv) { int i; for (i = 0; i < 10; ++i) { print_dot(); } atomic_puts("\nEXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/conditional_breakpoint_calls.py000066400000000000000000000003351265436462100221700ustar00rootroot00000000000000import re from rrutil import * send_gdb('b print_dot if checker()') expect_gdb('Breakpoint 1') send_gdb('watch dot_counter if checker()') expect_gdb('Hardware watchpoint 2') send_gdb('c') expect_rr('EXIT-SUCCESS') ok() rr-4.1.0/src/test/conditional_breakpoint_calls.run000066400000000000000000000000471265436462100223440ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/conditional_breakpoint_offload.c000066400000000000000000000005351265436462100223000ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpoint(void) { int break_here = 1; (void)break_here; } static int var; int main(int argc, char* argv[]) { int i; for (i = 0; i < 5000; ++i) { ++var; breakpoint(); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/conditional_breakpoint_offload.py000066400000000000000000000006171265436462100225070ustar00rootroot00000000000000from rrutil import * send_gdb('handle SIGKILL stop') send_gdb('b breakpoint') expect_gdb('Breakpoint 1') send_gdb('cond 1 var==-1') send_gdb('b main') expect_gdb('Breakpoint 2') send_gdb('c') expect_gdb('Breakpoint 2') send_gdb('c') # This should complete in a reasonable amount of time! expect_gdb('SIGKILL') send_gdb('reverse-continue') # And so should this! expect_gdb('Breakpoint 2') ok() rr-4.1.0/src/test/conditional_breakpoint_offload.run000066400000000000000000000000471265436462100226600ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/condvar_stress.c000066400000000000000000000046271265436462100171320ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define NUM_THREADS 10 #define NUM_TRIALS 1000 static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; static int last_written; static int trial; static int write_locked; static int done; static void* thread(void* idp) { int id = (intptr_t)idp; int num_loops = 0; int num_written = 0; while (1) { int this_write; ++num_loops; { pthread_mutex_lock(&lock); while (!done && (last_written == trial || write_locked)) { pthread_cond_wait(&cond, &lock); } if (done) { pthread_mutex_unlock(&lock); break; } write_locked = 1; this_write = trial; pthread_mutex_unlock(&lock); } atomic_printf("%d:%d(%d)\n", id, this_write, num_loops); ++num_written; { pthread_mutex_lock(&lock); last_written = this_write; write_locked = 0; pthread_cond_broadcast(&cond); pthread_mutex_unlock(&lock); } } atomic_printf(" (%d wrote %d)\n", id, num_written); pthread_exit((void*)(intptr_t)num_written); } int main(int argc, char* argv[]) { pthread_t threads[NUM_THREADS]; int i; int threads_num_written = 0; for (i = 0; i < NUM_THREADS; ++i) { test_assert(0 == pthread_create(&threads[i], NULL, thread, (void*)(intptr_t)i)); } for (i = 0; i < NUM_TRIALS; ++i) { { pthread_mutex_lock(&lock); assert(i == trial); test_assert(last_written == trial); ++trial; if (i % 2) { pthread_cond_signal(&cond); } else { pthread_cond_broadcast(&cond); } pthread_mutex_unlock(&lock); } { pthread_mutex_lock(&lock); while (last_written < trial) { pthread_cond_wait(&cond, &lock); } pthread_mutex_unlock(&lock); } } { pthread_mutex_lock(&lock); done = 1; pthread_cond_broadcast(&cond); pthread_mutex_unlock(&lock); } for (i = 0; i < NUM_THREADS; ++i) { void* ret = NULL; test_assert(0 == pthread_join(threads[i], &ret)); threads_num_written += (intptr_t)ret; } atomic_printf(" ... %d threads completed %d out of %d trials\n", NUM_THREADS, threads_num_written, NUM_TRIALS); test_assert(threads_num_written == NUM_TRIALS); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/condvar_stress.run000066400000000000000000000001741265436462100175050ustar00rootroot00000000000000source `dirname $0`/util.sh # Switch threads very eagerly on recorded events. RECORD_ARGS="-e1" compare_test EXIT-SUCCESS rr-4.1.0/src/test/constructor.c000066400000000000000000000002701265436462100164460ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" void lib_exit_success(void); int main(void) { lib_exit_success(); return 0; } rr-4.1.0/src/test/cont_signal.py000066400000000000000000000003361265436462100165720ustar00rootroot00000000000000from rrutil import * import re send_gdb('c') index = expect_list([re.compile(r'exited normally'), re.compile(r'Program received signal SIGUSR1')]) if index == 1: send_gdb('c') expect_gdb('exited normally') ok() rr-4.1.0/src/test/cont_signal.run000066400000000000000000000002271265436462100167450ustar00rootroot00000000000000source `dirname $0`/util.sh recorded_exe=async_usr1$bitness # SIGUSR1, wait 2.0s record_async_signal 10 2.0 $recorded_exe debug $TESTNAME_NO_BITNESS rr-4.1.0/src/test/cpuid.c000066400000000000000000000004371265436462100151720ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" extern int cpuid_loop(int iterations); int main(int argc, char** argv) { int sum; getegid(); sum = cpuid_loop(1000); atomic_printf("EXIT-SUCCESS; sum=%d\n", sum); return 0; } rr-4.1.0/src/test/cpuid.run000066400000000000000000000010031265436462100155420ustar00rootroot00000000000000source `dirname $0`/util.sh compare_test EXIT-SUCCESS if [[ $@ == -n ]]; then bin=$(which $TESTNAME) case $(file bin) in *x86-64*) syscall=geteuid ;; *i386*) syscall=geteuid32 ;; *) failed "can't determine architecture" ;; esac rr --suppress-environment-warnings dump $workdir/latest-trace | \ python2 $TESTDIR/check_syscall_perf_interval.py $syscall rbc 2 if [[ $? != 0 ]]; then failed "expected 2 rbcs between each geteuid32 syscall" fi fi rr-4.1.0/src/test/cpuid_loop.S000066400000000000000000000061311265436462100162000ustar00rootroot00000000000000#if defined(__i386__) .text .p2align 4,,15 .globl cpuid_call .type cpuid_call, @function cpuid_call: .cfi_startproc pushl %edi .cfi_def_cfa_offset 8 .cfi_offset 7, -8 pushl %esi .cfi_def_cfa_offset 12 .cfi_offset 6, -12 pushl %ebx .cfi_def_cfa_offset 16 .cfi_offset 3, -16 xorl %ebx, %ebx movl 20(%esp), %esi movl %ebx, %eax xchgl %ebx, %edi cpuid xchgl %ebx, %edi movl %eax, %ebx xorl %eax, %eax cmpl $4, %ebx jbe .L4 movl 16(%esp), %eax andl $4, %eax cpuid movl %eax, (%esi) movl 24(%esp), %eax movl %ebx, (%eax) movl (%esi), %eax .L4: popl %ebx .cfi_restore 3 .cfi_def_cfa_offset 12 popl %esi .cfi_restore 6 .cfi_def_cfa_offset 8 popl %edi .cfi_restore 7 .cfi_def_cfa_offset 4 ret .cfi_endproc .LFE89: .size cpuid_call, .-cpuid_call .p2align 4,,15 .globl cpuid_loop .type cpuid_loop, @function cpuid_loop: .cfi_startproc pushl %ebp .cfi_def_cfa_offset 8 .cfi_offset 5, -8 pushl %edi .cfi_def_cfa_offset 12 .cfi_offset 7, -12 pushl %esi .cfi_def_cfa_offset 16 .cfi_offset 6, -16 movl 16(%esp), %esi pushl %ebx .cfi_def_cfa_offset 20 .cfi_offset 3, -20 xorl %ebx, %ebx subl $44, %esp .cfi_def_cfa_offset 64 leal 28(%esp), %ebp leal 24(%esp), %edi .p2align 4,,7 .p2align 3 .L10: movl %ebx, (%esp) movl %ebp, 8(%esp) movl %edi, 4(%esp) call cpuid_call addl %eax, %ebx call geteuid@PLT subl $1, %esi jne .L10 addl $44, %esp .cfi_def_cfa_offset 20 movl %ebx, %eax popl %ebx .cfi_restore 3 .cfi_def_cfa_offset 16 popl %esi .cfi_restore 6 .cfi_def_cfa_offset 12 popl %edi .cfi_restore 7 .cfi_def_cfa_offset 8 popl %ebp .cfi_restore 5 .cfi_def_cfa_offset 4 ret .cfi_endproc .size cpuid_loop, .-cpuid_loop #elif defined(__x86_64__) .text .p2align 4,,15 .globl cpuid_call .type cpuid_call, @function cpuid_call: pushq %rbx /* Call CPUID twice, once under a conditional. */ xorl %eax, %eax cpuid cmpl $4, %eax jbe 1f movl $1, %eax cpuid 1: popq %rbx ret .size cpuid_call, .-cpuid_call .p2align 4,,15 .globl cpuid_loop .type cpuid_loop, @function cpuid_loop: mov %rdi, %rbx xor %r12d, %r12d 1: call cpuid_call addq %rax, %r12 call geteuid@PLT subq $1, %rbx jne 1b movq %r12, %rax ret .size cpuid_loop, .-cpuid_loop #else #error unknown CPU architecture #endif /* __i386__/__x86_64__ */ .section .note.GNU-stack,"",@progbits rr-4.1.0/src/test/crash.c000066400000000000000000000003361265436462100151640ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { volatile int* p = NULL; *p = 42; test_assert("Not reached" && 0); return 0; } rr-4.1.0/src/test/crash.run000066400000000000000000000001021265436462100155350ustar00rootroot00000000000000source `dirname $0`/util.sh record crash$bitness replay check '' rr-4.1.0/src/test/crash_in_function.c000066400000000000000000000003341265436462100175550ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" void crash(void) { *(int*)NULL = 0; } int main(int argc, char* argv[]) { atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/crash_in_function.py000066400000000000000000000011551265436462100177650ustar00rootroot00000000000000from rrutil import * send_gdb('handle SIGKILL stop') send_gdb('set unwindonsignal off') send_gdb('b main') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('call crash()') expect_gdb('SIGSEGV') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('next') send_gdb('call crash()') expect_gdb('SIGSEGV') send_gdb('c') expect_gdb('SIGKILL') restart_replay() expect_gdb('Breakpoint 1') send_gdb('delete 1') send_gdb('set unwindonsignal on') send_gdb('call crash()') expect_gdb('SIGSEGV') send_gdb('next') send_gdb('call crash()') expect_gdb('SIGSEGV') send_gdb('c') expect_gdb('SIGKILL') ok() rr-4.1.0/src/test/crash_in_function.run000066400000000000000000000000471265436462100201400ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/creat_address_not_truncated.c000066400000000000000000000015321265436462100216170ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #include static const char dummy_filename[] = "dummy.txt"; int main(int argc, char* argv[]) { size_t page_size = sysconf(_SC_PAGESIZE); // Request an address where casting to int could corrupt the address on 64-bit // (i.e. not near the top or bottom of memory). uint8_t* map = mmap((void*)(LONG_MAX / 2), page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); // Copy the filename there, and try to use creat. If the address gets // truncated, this can cause a segmentation fault. memcpy(map, dummy_filename, sizeof(dummy_filename)); int fd = creat((const char*)map, 0600); close(fd); test_assert(access(dummy_filename, F_OK) == 0); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/dead_thread_target.py000066400000000000000000000017761265436462100200750ustar00rootroot00000000000000from rrutil import * import re send_gdb('b hit_barrier') expect_gdb('Breakpoint 1') send_gdb('b joined_threads') expect_gdb('Breakpoint 2') send_gdb('c') expect_gdb('Breakpoint 1, hit_barrier') send_gdb('info thr') expect_gdb('2 Thread') send_gdb('thr 2') expect_gdb('Switching to thread 2') send_gdb('c') # TODO: with the gdb in fedora 19, if a thread dies while it's the # resume target, then rr notifies gdb, but gdb doesn't ask for a new # thread list. This seems like a gdb bug, because we don't have any # other way to notify gdb of thread death, and the same code works # just fine in concurrent ubuntu and older versions. # # So we work around that problem by returning this special error code # to the user. Once gdb has made this mistake, the debugging session # is "stuck" because won't let any other threads continue. But at # least this error code tells the user that they need to restart the # session. expect_gdb(re.compile( r'Breakpoint 2, joined_threads|Remote failure reply: E10')) ok() rr-4.1.0/src/test/dead_thread_target.run000066400000000000000000000001141265436462100202320ustar00rootroot00000000000000source `dirname $0`/util.sh record barrier$bitness debug dead_thread_target rr-4.1.0/src/test/deliver_async_signal_during_syscalls.run000066400000000000000000000007421265436462100241200ustar00rootroot00000000000000source `dirname $0`/util.sh # See async_signal_syscalls.run for an explanation. skip_if_no_syscall_buf # SIGUSR1, wait 0.5s; do 2^30 iterations to give us effectively unlimited time # (the test exits when SIGUSR1 has been handled) record_async_signal 10 0.5 async_signal_syscalls$bitness 30 # Because of issue #184, replay takes longer than practical. So for # now we'll skip it and hope other tests exercise the relevant code # well enough. #replay #check 'EXIT-SUCCESS' passed rr-4.1.0/src/test/desched_blocking_poll.c000066400000000000000000000011741265436462100203620ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define NUM_PFDS 200 int main(int argc, char* argv[]) { int fds[2]; struct pollfd pfds[NUM_PFDS]; char ch = 'x'; int i; pipe(fds); for (i = 0; i < NUM_PFDS; ++i) { pfds[i].fd = fds[0]; pfds[i].events = POLLIN; } if (fork() == 0) { usleep(1000); write(fds[1], &ch, 1); return 0; } /* This should block */ test_assert(NUM_PFDS == poll(pfds, NUM_PFDS, -1)); test_assert(POLLIN & pfds[0].revents); test_assert(1 == read(pfds[0].fd, &ch, 1)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/desched_ticks.py000066400000000000000000000007331265436462100170670ustar00rootroot00000000000000from rrutil import * import re send_gdb('handle SIGKILL stop') send_gdb('b main') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('b __before_poll_syscall_breakpoint') expect_gdb('Breakpoint 2') send_gdb('c') index = expect_list([re.compile(r'Breakpoint 2'), re.compile(r'SIGKILL')]) if index == 0: # This is testing that we can reverse-step without crashing rr. send_gdb('reverse-stepi') send_gdb('c') expect_gdb('Breakpoint 2') ok() rr-4.1.0/src/test/desched_ticks.run000066400000000000000000000003411265436462100172360ustar00rootroot00000000000000source `dirname $0`/util.sh # Without the syscallbuf, this test makes no sense # and will fail since it sets a breakpoint in the preload code. skip_if_no_syscall_buf record desched_blocking_poll$bitness debug desched_ticks rr-4.1.0/src/test/dup.c000066400000000000000000000023331265436462100146530ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static char buf[] = "0123456789"; int main(int argc, char* argv[]) { int pipe_fds[2]; int fd; char ch; test_assert(0 == pipe(pipe_fds)); test_assert(10 == write(pipe_fds[1], buf, 10)); fd = dup(pipe_fds[0]); test_assert(fd >= 0); test_assert(fd != pipe_fds[0] && fd != pipe_fds[1]); test_assert(1 == read(fd, &ch, 1)); test_assert(ch == '0'); fd = dup2(pipe_fds[0], 0); test_assert(fd == 0); test_assert(1 == read(fd, &ch, 1)); test_assert(ch == '1'); fd = dup3(pipe_fds[0], 49, O_CLOEXEC); test_assert(fd == 49); test_assert(1 == read(fd, &ch, 1)); test_assert(ch == '2'); test_assert(FD_CLOEXEC == fcntl(fd, F_GETFD)); test_assert(fd == dup2(0, fd)); test_assert(0 == fcntl(fd, F_GETFD)); fd = fcntl(pipe_fds[0], F_DUPFD, 49); test_assert(fd == 50); test_assert(1 == read(fd, &ch, 1)); test_assert(ch == '3'); test_assert(0 == fcntl(fd, F_GETFD)); fd = fcntl(pipe_fds[0], F_DUPFD_CLOEXEC, 49); test_assert(fd == 51); test_assert(1 == read(fd, &ch, 1)); test_assert(ch == '4'); test_assert(FD_CLOEXEC == fcntl(fd, F_GETFD)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/env_newline.run000066400000000000000000000001441265436462100167540ustar00rootroot00000000000000export BADENV='hello kitty' source `dirname $0`/util.sh compare_test EXIT-SUCCESS "" simple$bitness rr-4.1.0/src/test/epoll_create.c000066400000000000000000000004671265436462100165270ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { int fd; fd = epoll_create(1); atomic_printf("New epoll file descriptor: %d\n", fd); if (fd >= 0) { atomic_puts("EXIT-SUCCESS"); } close(fd); return 0; } rr-4.1.0/src/test/epoll_create1.c000066400000000000000000000004701265436462100166020ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { int fd; fd = epoll_create1(0); atomic_printf("New epoll file descriptor: %d\n", fd); if (fd >= 0) { atomic_puts("EXIT-SUCCESS"); } close(fd); return 0; } rr-4.1.0/src/test/exec_flags.c000066400000000000000000000022311265436462100161600ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void my_exec(const char* filename, const char** argv, const char** envp) { #ifdef __i386__ /* Use a special instruction after the syscall to make sure we don't patch it */ int out_bx; __asm__ __volatile__("xor %%ebx,%%ebx\n\t" "xchg %%ebx,%%edi\n\t" "int $0x80\n\t" "xchg %%ebx,%%edi\n\t" : "=b"(out_bx) : "a"(SYS_execve), "c"(argv), "d"(envp), "D"(filename)); #elif defined(__x86_64__) int out_bx; /* Use a special instruction after the syscall to make sure we don't patch it */ __asm__ __volatile__("xor %%ebx,%%ebx\n\t" "syscall\n\t" "xchg %%rdx,%%rdx\n\t" : "=b"(out_bx) : "a"(SYS_execve), "D"(filename), "S"(argv), "d"(envp)); #else #error Unknown architecture #endif } int main(int argc, const char* argv[], const char* envp[]) { my_exec("/no-exist!", argv, envp); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/exec_self.c000066400000000000000000000010401265436462100160120ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpoint(void) { int break_here = 1; (void)break_here; } int main(int argc, char* argv[]) { test_assert(argc == 1 || (argc == 2 && !strcmp("self", argv[1]))); if (argc != 2) { atomic_printf("exec(%s, 'self') ...\n", argv[0]); breakpoint(); /* No syscalls in between here. */ execlp(argv[0], argv[0], "self", NULL); test_assert("Not reached" && 0); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/exec_stop.py000066400000000000000000000004301265436462100162560ustar00rootroot00000000000000from rrutil import * import re send_gdb('c') expect_gdb('stopped') send_gdb('stepi') expect_gdb('stopped') send_gdb('stepi') expect_gdb('stopped') send_gdb('c') expect_gdb('stopped') send_gdb('b execve') expect_gdb('Breakpoint 1') send_gdb('rc') expect_gdb('Breakpoint 1') ok() rr-4.1.0/src/test/exec_stop.run000066400000000000000000000001051265436462100164310ustar00rootroot00000000000000source `dirname $0`/util.sh record exec_self$bitness debug exec_stop rr-4.1.0/src/test/execp.run000066400000000000000000000002271265436462100155510ustar00rootroot00000000000000source `dirname $0`/util.sh exe=simple$bitness cp ${OBJDIR}/bin/$exe $exe-$nonce PATH="${PATH}:." just_record $exe-$nonce replay check 'EXIT-SUCCESS' rr-4.1.0/src/test/execve_loop.c000066400000000000000000000010331265436462100163670ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" /* The runner script bombards us with SIGCHLDs. It's quite likely that one of these will be received during an AutoRemoteSyscalls sycall, which is what we want to test here. */ int main(int argc, char* argv[], char* envp[]) { int count = atoi(argv[1]); if (count > 0) { char buf[10]; sprintf(buf, "%d", count - 1); argv[1] = buf; execve(argv[0], argv, envp); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/execve_loop.run000066400000000000000000000005261265436462100167570ustar00rootroot00000000000000source `dirname $0`/util.sh record $TESTNAME 100 & for i in $(seq 1 30); do sleep 0.01 kill -CHLD $rrpid $(pidof $TESTNAME-$nonce) >& /dev/null done # Wait for 'record' to actually terminate. Otherwise we might start # replaying before the trace file has been completely written. wait echo "Replaying ..." replay check 'EXIT-SUCCESS' rr-4.1.0/src/test/exit_group.c000066400000000000000000000007161265436462100162530ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" pthread_barrier_t bar; static void* thread(void* unused) { pthread_barrier_wait(&bar); sleep(-1); return NULL; } int main(int argc, char* argv[]) { pthread_t t; pthread_barrier_init(&bar, NULL, 2); pthread_create(&t, NULL, thread, NULL); pthread_barrier_wait(&bar); atomic_puts("_exit()ing"); _exit(0); return 0; /* not reached */ } rr-4.1.0/src/test/exit_group.run000066400000000000000000000000661265436462100166330ustar00rootroot00000000000000source `dirname $0`/util.sh compare_test '_exit()ing' rr-4.1.0/src/test/exit_status.c000066400000000000000000000002021265436462100164300ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(void) { return 7; } rr-4.1.0/src/test/exit_status.run000066400000000000000000000002151265436462100170160ustar00rootroot00000000000000source `dirname $0`/util.sh record exit_status$bitness if [[ $? != 7 ]]; then failed "got exit status $?, expected 7" else passed fi rr-4.1.0/src/test/explicit_checkpoint_clone.py000066400000000000000000000006541265436462100215050ustar00rootroot00000000000000from rrutil import * send_gdb('b breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1, breakpoint') send_gdb('c') expect_gdb('Breakpoint 1, breakpoint') # Now we should be in the clone() thread. checkpoint here. send_gdb('checkpoint') expect_gdb('= 1') send_gdb('restart 1') expect_gdb('stopped') send_gdb('c') expect_gdb('Breakpoint 1, breakpoint') send_gdb('c') expect_gdb('EXIT-SUCCESS') ok() rr-4.1.0/src/test/explicit_checkpoint_clone.run000066400000000000000000000001211265436462100216460ustar00rootroot00000000000000source `dirname $0`/util.sh record clone$bitness debug explicit_checkpoint_clone rr-4.1.0/src/test/explicit_checkpoints.c000066400000000000000000000007711265436462100203020ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpoint2(void) { int break_here = 1; (void)break_here; } static void breakpoint3(void) { int break_here = 1; (void)break_here; } int main(int argc, char* argv[]) { /* NO SYSCALLS BETWEEN HERE AND RDTSC: next event for * replay must be rdtsc */ rdtsc(); breakpoint2(); atomic_printf("Write syscall...\n"); breakpoint3(); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/explicit_checkpoints.py000066400000000000000000000023141265436462100205030ustar00rootroot00000000000000from rrutil import * import re # Setup breakpoints send_gdb('b main') expect_gdb('Breakpoint 1') send_gdb('b breakpoint2') expect_gdb('Breakpoint 2') send_gdb('b breakpoint3') expect_gdb('Breakpoint 3') # Create checkpoint at each breakpoint send_gdb('c') expect_gdb('Breakpoint 1, main') send_gdb('checkpoint') index = expect_list([re.compile(r'= 1'), re.compile(r'ERROR')]) if index > 0: failed('ERROR detected in rr output') send_gdb('c') expect_gdb('Breakpoint 2, breakpoint2') send_gdb('checkpoint') expect_gdb('= 2') send_gdb('c') expect_gdb('Breakpoint 3, breakpoint3') send_gdb('checkpoint') expect_gdb('= 3') # Resume checkpoints: each one stops at its breakpoint send_gdb("restart 1"); expect_gdb('stopped') send_gdb('c') expect_gdb('Breakpoint 2, breakpoint2') send_gdb("restart 3"); expect_gdb('stopped') send_gdb('c') expect_rr('exited normally') send_gdb("restart 2"); expect_gdb('stopped') send_gdb('c') expect_gdb('Breakpoint 3, breakpoint3') # Bare 'run' defaults to last resumed checkpoint restart_replay() expect_gdb('Breakpoint 3, breakpoint3') # Remove checkpoint 2 and try resuming it; it should fail send_gdb('delete checkpoint 2') send_gdb("restart 2"); send_gdb('c') expect_gdb('failed') ok() rr-4.1.0/src/test/explicit_checkpoints.run000066400000000000000000000000471265436462100206600ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/fadvise.c000066400000000000000000000010561265436462100155050ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { /* There's not a (simple) way to meaningfully test fadvise, * since it only provides optimization hints, so this just * checks that rr doesn't blow up when it sees one. */ posix_fadvise(-1, 0, 0, POSIX_FADV_NORMAL); syscall(SYS_fadvise64, -1, 0, 0, POSIX_FADV_NORMAL); #if defined(SYS_fadvise64_64) syscall(SYS_fadvise64_64, -1, POSIX_FADV_NORMAL, 0, 0); #endif atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/fault_in_code_page.c000066400000000000000000000040371265436462100176550ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" typedef int (*fn_type)(void); static fn_type fn = NULL; static const uint8_t fn_insns[] = { 0xb8, 0x2a, 0x00, 0x00, 0x00, /* movl $42, %eax */ 0xc3, /* ret */ }; static uint8_t* code_page; static size_t page_size; static int fault_count; static void fault_in_code_page(int sig, siginfo_t* si, void* context) { atomic_printf("FAULT: signal %d: code %d for addr %p\n", sig, si->si_code, si->si_addr); test_assert(SIGSEGV == sig); test_assert(SEGV_ACCERR == si->si_code); test_assert(code_page == si->si_addr); test_assert(1 == ++fault_count); atomic_puts(" populating page..."); test_assert(0 == mprotect(code_page, page_size, PROT_READ | PROT_WRITE)); test_assert(sizeof(fn_insns) < page_size); memcpy(code_page, fn_insns, sizeof(fn_insns)); test_assert(0 == mprotect(code_page, page_size, PROT_READ | PROT_EXEC)); atomic_puts(" ... and protected it. sigreturn'ing"); } static uint64_t sigsegv_blocked_rdtsc(void) { sigset_t s, old; sigemptyset(&s); sigaddset(&s, SIGSEGV); sigprocmask(SIG_BLOCK, &s, &old); uint64_t tsc = rdtsc(); sys_gettid(); sigprocmask(SIG_SETMASK, &old, NULL); return tsc; } int main(int argc, char* argv[]) { struct sigaction act; page_size = sysconf(_SC_PAGESIZE); act.sa_sigaction = fault_in_code_page; act.sa_flags = SA_SIGINFO; sigemptyset(&act.sa_mask); sigaction(SIGSEGV, &act, NULL); atomic_printf("current tsc: %" PRIu64 "\n", sigsegv_blocked_rdtsc()); atomic_printf(" and now: %" PRIu64 "\n", sigsegv_blocked_rdtsc()); code_page = mmap(NULL, page_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); atomic_printf("(%d) mapped code page to %p\n", errno, code_page); test_assert(code_page != (void*)-1); fn = (fn_type)code_page; atomic_printf("calling fn(), faulting ...\n"); int ret = fn(); atomic_printf("fn() returned %d\n", ret); test_assert(42 == ret); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/fcntl_dupfd.c000066400000000000000000000004571265436462100163600ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { int fd; fd = fcntl(1, F_DUPFD, 3); test_assert(fd >= 3); close(1); fd = dup2(fd, 1); test_assert(fd == 1); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/fcntl_owner_ex.c000066400000000000000000000014461265436462100171030ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void dump_owner(const char* tag, int fd) { struct f_owner_ex own; memset(&own, 0, sizeof(own)); test_assert(0 == fcntl(fd, F_GETOWN_EX, &own)); atomic_printf("%s: { type: %d, pid: %d }\n", tag, own.type, own.pid); } int main(int argc, char* argv[]) { int sockfds[2]; int fd; struct f_owner_ex own; test_assert(0 == socketpair(AF_LOCAL, SOCK_STREAM, 0, sockfds)); fd = sockfds[0]; /* doesn't matter */ test_assert(0 == fcntl(fd, F_SETFL, O_ASYNC)); dump_owner("initially", fd); own.type = F_OWNER_TID; own.pid = getpid(); test_assert(0 == fcntl(fd, F_SETOWN_EX, &own)); dump_owner("after SETOWN_EX(TID, self)", fd); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/fcntl_seals.c000066400000000000000000000017641265436462100163670ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define TEST_MEMFD "foo" #ifndef MFD_CLOEXEC #define MFD_CLOEXEC 0x0001 #define MFD_ALLOW_SEALING 0x0002 #endif #ifndef F_ADD_SEALS #define F_ADD_SEALS 0x409 #endif #ifndef F_SEAL_SEAL #define F_SEAL_SEAL 0x0001 #define F_SEAL_SHRINK 0x0002 #define F_SEAL_GROW 0x0004 #define F_SEAL_WRITE 0x0008 #endif int main(int argc, char* argv[]) { int fd; /* There's no libc helper for this syscall. */ fd = syscall(RR_memfd_create, TEST_MEMFD, MFD_ALLOW_SEALING); if (-1 == fd && ENOSYS == errno) { atomic_puts("SYS_memfd_create not supported on this kernel"); } else { test_assert(fd >= 0); test_assert( fcntl(fd, F_ADD_SEALS, F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW) == 0); /* Seal after F_SEAL_SEAL should fail */ test_assert(fcntl(fd, F_ADD_SEALS, F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW) == -1); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/fcntl_sig.c000066400000000000000000000004301265436462100160270ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { test_assert(0 == fcntl(1, F_SETSIG, SIGCHLD)); test_assert(SIGCHLD == fcntl(1, F_GETSIG, 0)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/fd_tracking_across_threads.c000066400000000000000000000006221265436462100214210ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void* start_thread(void* p) { test_assert(11 == dup2(STDOUT_FILENO, 11)); test_assert(14 == write(11, "EXIT-SUCCESS\n", 14)); return NULL; } int main(int argc, char** argv) { pthread_t thread; pthread_create(&thread, NULL, start_thread, NULL); pthread_exit(NULL); return 0; } rr-4.1.0/src/test/fds_clean.c000066400000000000000000000004671265436462100160070ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char** argv) { int fd; for (fd = 3; fd < 100; ++fd) { /* Check that |fd| is available to us. */ test_assert(dup2(2, fd) == fd); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/final_sigkill.py000066400000000000000000000003371265436462100171020ustar00rootroot00000000000000from rrutil import * import re send_gdb('handle SIGKILL stop') send_gdb('c') expect_gdb('received signal SIGKILL') restart_replay() expect_gdb('received signal SIGKILL') send_gdb('c') expect_gdb('exited normally') ok() rr-4.1.0/src/test/final_sigkill.run000066400000000000000000000001061265436462100172500ustar00rootroot00000000000000source `dirname $0`/util.sh record simple$bitness debug final_sigkill rr-4.1.0/src/test/first_instruction.py000066400000000000000000000001151265436462100200550ustar00rootroot00000000000000from rrutil import * send_gdb('disass') expect_gdb('function _start') ok() rr-4.1.0/src/test/first_instruction.run000066400000000000000000000001151265436462100202310ustar00rootroot00000000000000source `dirname $0`/util.sh record exec_stub$bitness debug first_instruction rr-4.1.0/src/test/flock.c000066400000000000000000000070211265436462100151600ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define FILENAME "foo.txt" int main(int argc, char* argv[]) { ssize_t pagesize = sysconf(_SC_PAGESIZE); int fd; int i; int err; pid_t parent_pid = getpid(); pid_t pid; int status; fd = open(FILENAME, O_CREAT | O_EXCL | O_RDWR, 0600); test_assert(fd >= 0); unlink(FILENAME); atomic_printf("parent pid is %d\n", parent_pid); /* Write a page's worth of data. */ for (i = 0; i < pagesize / sizeof(i); ++i) { ssize_t nwritten = write(fd, &i, sizeof(i)); test_assert(nwritten == sizeof(i)); } { struct flock lock = {.l_type = F_RDLCK, .l_whence = SEEK_SET, .l_start = pagesize, .l_len = -pagesize / 2 }; atomic_printf("sizeof(flock) = %zu\n", sizeof(lock)); /* It should currently be unlocked. */ err = fcntl(fd, F_GETLK, &lock); test_assert(0 == err); atomic_printf("before lock: type: %d, pid: %d\n", lock.l_type, lock.l_pid); test_assert(F_UNLCK == lock.l_type); lock.l_type = F_RDLCK; fcntl(fd, F_SETLK, &lock); test_assert(0 == err); /* Make sure our lock "took". */ if (0 == (pid = fork())) { lock.l_type = F_WRLCK; err = fcntl(fd, F_GETLK, &lock); test_assert(0 == err); atomic_printf(" after lock: type: %d, pid: %d\n", lock.l_type, lock.l_pid); test_assert(F_RDLCK == lock.l_type && pagesize / 2 == lock.l_start && pagesize / 2 == lock.l_len && parent_pid == lock.l_pid); exit(0); } waitpid(pid, &status, 0); test_assert(WIFEXITED(status) && 0 == WEXITSTATUS(status)); } { struct flock64 lock = { .l_type = F_WRLCK, .l_whence = SEEK_SET, .l_start = 0, .l_len = pagesize }; atomic_printf("sizeof(flock64) = %zu\n", sizeof(lock)); /* We should be able to take a write lock on the whole * file. The kernel will upgrade the readlock. */ err = fcntl(fd, F_GETLK64, &lock); test_assert(0 == err); atomic_printf("before lock: type: %d, pid: %d\n", lock.l_type, lock.l_pid); test_assert(F_UNLCK == lock.l_type); lock.l_type = F_WRLCK; fcntl(fd, F_SETLK64, &lock); test_assert(0 == err); /* Make sure our lock "took". */ if (0 == (pid = fork())) { lock.l_type = F_RDLCK; err = fcntl(fd, F_GETLK64, &lock); test_assert(0 == err); atomic_printf(" after GETLK: type: %d, pid: %d\n", lock.l_type, lock.l_pid); test_assert(F_WRLCK == lock.l_type && 0 == lock.l_start && pagesize == lock.l_len && parent_pid == lock.l_pid); lock.l_type = F_RDLCK; lock.l_pid = 0; err = fcntl(fd, F_SETLKW64, &lock); test_assert(0 == err); atomic_printf(" after SETLKW: type: %d, pid: %d\n", lock.l_type, lock.l_pid); test_assert(F_RDLCK == lock.l_type && 0 == lock.l_start && pagesize == lock.l_len && 0 == lock.l_pid); atomic_puts(" releasing lock ..."); lock.l_type = F_UNLCK; fcntl(fd, F_SETLK64, &lock); test_assert(0 == err); return 0; } atomic_puts("P: forcing child to block on LK, sleeping ..."); usleep(500000); atomic_puts("P: ... awake, releasing lock"); lock.l_type = F_UNLCK; fcntl(fd, F_SETLK64, &lock); test_assert(0 == err); waitpid(pid, &status, 0); test_assert(WIFEXITED(status) && 0 == WEXITSTATUS(status)); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/flock2.c000066400000000000000000000012711265436462100152430ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ /* Test for the 'flock' system call. */ #include "rrutil.h" #define FILENAME "foo.txt" int main(int argc, char* argv[]) { int fd; int result; fd = open(FILENAME, O_CREAT | O_EXCL | O_RDWR, 0600); test_assert(fd >= 0); result = flock(fd, LOCK_SH); test_assert(result == 0); result = flock(fd, LOCK_EX); test_assert(result == 0); result = flock(fd, LOCK_UN); test_assert(result == 0); result = close(fd); test_assert(result == 0); result = flock(fd, LOCK_EX); test_assert(result < 0); test_assert(errno == EBADF); unlink(FILENAME); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/fork_brk.c000066400000000000000000000005671265436462100156710ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { pid_t child = fork(); int status; if (!child) { sbrk(100000); return 77; } test_assert(child == wait(&status)); test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 77); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/fork_child_crash.c000066400000000000000000000012551265436462100173510ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpoint(void) { int break_here = 1; (void)break_here; } int main(int argc, char* argv[]) { pid_t child = fork(); int status; if (0 == child) { atomic_printf("child %d\n", getpid()); breakpoint(); atomic_puts("subprocess: crashing ..."); *(volatile int*)NULL = 0; exit(0); /* not reached */ } test_assert(child == waitpid(child, &status, 0)); atomic_printf("parent: subprocess %d exited with %#x\n", child, status); test_assert(WIFSIGNALED(status) && SIGSEGV == WTERMSIG(status)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/fork_exec_info_thr.run000066400000000000000000000004401265436462100202770ustar00rootroot00000000000000source `dirname $0`/util.sh save_exe barrier$bitness saved_barrier="barrier$bitness-$nonce" record target_process$bitness "$saved_barrier" TARGET_PID=$(grep 'child ' record.out | awk '{print $2}') echo Targeting recorded pid $TARGET_PID ... debug get_thread_list "-p $TARGET_PID -g 1" rr-4.1.0/src/test/fork_stress.c000066400000000000000000000006451265436462100164330ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define NUM_ITERATIONS 250 int main(int argc, char* argv[]) { int i; for (i = 0; i < NUM_ITERATIONS; ++i) { pid_t child = fork(); if (0 == child) { return 0; } if (0 > child) { atomic_printf("Fork failed with errno %d\n", errno); } } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/fork_syscalls.c000066400000000000000000000010231265436462100167340ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void syscalls(int num) { struct timespec ts; struct timeval tv; int i; for (i = 0; i < num; ++i) { clock_gettime(CLOCK_MONOTONIC, &ts); gettimeofday(&tv, NULL); } } int main(void) { int child; syscalls(10); if (0 == (child = fork())) { syscalls(10); atomic_printf("CHILD-EXIT "); exit(0); } syscalls(10); waitpid(child, NULL, 0); atomic_puts("PARENT-EXIT"); return 0; } rr-4.1.0/src/test/fork_syscalls.run000066400000000000000000000001021265436462100173130ustar00rootroot00000000000000source `dirname $0`/util.sh compare_test "CHILD-EXIT PARENT-EXIT" rr-4.1.0/src/test/function_calls.c000066400000000000000000000011041265436462100170610ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static volatile int result = 0; static void funcallD(int n) { result = n; } static void funcallC(int n) { funcallD(n); funcallD(n); } static void funcallB(int n) { funcallC(n); funcallC(n); } static void funcallA(int n) { funcallB(n); funcallB(n); } static void funcall(int n) { funcallA(n); funcallA(n); } int main(int argc, char* argv[]) { funcall(1); funcall(1); atomic_printf("result=%d\n", result); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/function_calls.py000066400000000000000000000003361265436462100172750ustar00rootroot00000000000000from rrutil import * send_gdb('break main') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('n'); send_gdb('n'); send_gdb('reverse-next'); send_gdb('c'); expect_gdb('exited normally') ok() rr-4.1.0/src/test/function_calls.run000066400000000000000000000000471265436462100174500ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/fxregs.c000066400000000000000000000034651265436462100153700ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpoint(void) { int break_here = 1; (void)break_here; } static const double st0 = 1; static const double st1 = 2; static const double st2 = 3; static const double st3 = 4; static const double st4 = 5; static const double st5 = 6; static const double st6 = 7; static const double st7 = 8; static const float xmm0 = 10; static const float xmm1 = 11; static const float xmm2 = 12; static const float xmm3 = 13; static const float xmm4 = 14; static const float xmm5 = 15; static const float xmm6 = 16; static const float xmm7 = 17; int main(int argc, char* argv[]) { __asm__ __volatile__( /* Push the constants in stack order so they look as * we expect in gdb. */ #if __i386__ "fldl st7\n\t" "fldl st6\n\t" "fldl st5\n\t" "fldl st4\n\t" "fldl st3\n\t" "fldl st2\n\t" "fldl st1\n\t" "fldl st0\n\t" "movss xmm0, %xmm0\n\t" "movss xmm1, %xmm1\n\t" "movss xmm2, %xmm2\n\t" "movss xmm3, %xmm3\n\t" "movss xmm4, %xmm4\n\t" "movss xmm5, %xmm5\n\t" "movss xmm6, %xmm6\n\t" "movss xmm7, %xmm7\n\t" #elif __x86_64__ "fldl st7(%rip)\n\t" "fldl st6(%rip)\n\t" "fldl st5(%rip)\n\t" "fldl st4(%rip)\n\t" "fldl st3(%rip)\n\t" "fldl st2(%rip)\n\t" "fldl st1(%rip)\n\t" "fldl st0(%rip)\n\t" "movss xmm0(%rip), %xmm0\n\t" "movss xmm1(%rip), %xmm1\n\t" "movss xmm2(%rip), %xmm2\n\t" "movss xmm3(%rip), %xmm3\n\t" "movss xmm4(%rip), %xmm4\n\t" "movss xmm5(%rip), %xmm5\n\t" "movss xmm6(%rip), %xmm6\n\t" "movss xmm7(%rip), %xmm7\n\t" #else #error unexpected architecture #endif ); breakpoint(); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/fxregs.py000066400000000000000000000006431265436462100155710ustar00rootroot00000000000000from rrutil import * send_gdb('b breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1, breakpoint') # See fxregs.c for the list of constants that are loaded into the # $st0-$st7 and $xmm0-$xmm7 registers. for i in xrange(8): send_gdb('p $st%d'% (i)) expect_gdb(' = %d'% (i + 1)) for i in xrange(8): send_gdb('p $xmm%d.v4_float[0]'% (i)) expect_gdb(' = %d'% (i + 10)) ok() rr-4.1.0/src/test/fxregs.run000066400000000000000000000000631265436462100157410ustar00rootroot00000000000000source `dirname $0`/util.sh fxregs "$@" debug_test rr-4.1.0/src/test/generic_break.py000066400000000000000000000002061265436462100170460ustar00rootroot00000000000000from rrutil import * send_gdb('b breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1, breakpoint') ok() rr-4.1.0/src/test/get_thread_list.py000066400000000000000000000023101265436462100174250ustar00rootroot00000000000000from rrutil import * import re NUM_THREADS = 10 send_gdb('b hit_barrier') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1, hit_barrier') arch = get_exe_arch() # The locations the threads are stopped at depends on the architecture. stopped_locations = { # on i386, we sometimes stop in the middle of nowhere 'i386': ['(0x[0-9a-f]+ in )?__kernel_vsyscall', '(0x[0-9a-f]+ in )?_traced_raw_syscall', '0x[0-9a-f]+ in \?\?', '(0x[0-9a-f]+ in )?__lll_lock_wait', '(0x[0-9a-f]+ in )?pthread_barrier_wait'], 'i386:x86-64': ['(0x[0-9a-f]+ in )?__lll_lock_wait', '(0x[0-9a-f]+ in )?pthread_barrier_wait', '0x70000010 in \?\?'], } send_gdb('info threads') for i in xrange(NUM_THREADS + 1, 1, -1): # The threads are at the kernel syscall entry, or either the # traced/untraced entry reached through the rr monkeypatched one. # Rarely, non-main threads have been observed to be reordered (i.e. gdb # did not number them in order of creation). This does not seem to be a bug # so tolerate it. expect_gdb(r'%d\s+Thread[^(]+\(BP-THREAD-[0-9]+\) (?:%s) \(\)'% (i, '|'.join(stopped_locations[arch]))) expect_gdb(r'1\s+Thread[^h]+hit_barrier \(\)') ok() rr-4.1.0/src/test/get_thread_list.run000066400000000000000000000001111265436462100175760ustar00rootroot00000000000000source `dirname $0`/util.sh record barrier$bitness debug get_thread_list rr-4.1.0/src/test/getcwd.c000066400000000000000000000021761265436462100153450ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { ssize_t pagesize = sysconf(_SC_PAGESIZE); ssize_t two_pages_size = 2 * pagesize; void* two_pages = mmap(NULL, two_pages_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); void* two_pages_end = two_pages + two_pages_size; char* cwd; char* expected_cwd; test_assert(argc == 2); expected_cwd = argv[1]; test_assert(two_pages != (void*)-1); /* Make the value returned into |path| overlap two physical * pages. */ cwd = two_pages + pagesize - 3; /* Fill pages with non-zeroes to ensure the returned string is * properly null-terminated */ memset(two_pages, 0xFF, two_pages_size); test_assert(cwd == getcwd(cwd, two_pages_end - (void*)cwd)); atomic_printf("current working directory is %s; should be %s\n", cwd, expected_cwd); test_assert(!strcmp(cwd, expected_cwd)); /* Make sure we didn't write too many bytes */ test_assert((unsigned char)cwd[strlen(cwd) + 1] == 0xFF); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/getcwd.run000066400000000000000000000001171265436462100157200ustar00rootroot00000000000000source `dirname $0`/util.sh record $TESTNAME "$PWD" replay check EXIT-SUCCESS rr-4.1.0/src/test/getgroups.c000066400000000000000000000006701265436462100161040ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { gid_t groups[1024]; int num_groups = getgroups(ALEN(groups), groups); int i; atomic_printf("User %d belongs to %d groups:\n ", geteuid(), num_groups); for (i = 0; i < num_groups; ++i) { atomic_printf("%d,", groups[i]); } atomic_puts(""); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/getrandom.c000066400000000000000000000014421265436462100160430ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #ifndef GRND_NONBLOCK #define GRND_NONBLOCK 0x0001 #define GRND_RANDOM 0x0002 #endif int main(int argc, char* argv[]) { char buf[128]; int ret; memset(buf, 0, sizeof(buf)); /* There's no libc helper for this syscall. */ ret = syscall(RR_getrandom, buf, sizeof(buf), GRND_NONBLOCK); if (-1 == ret && ENOSYS == errno) { atomic_puts("SYS_getrandom not supported on this kernel"); } else { uint i; test_assert(sizeof(buf) == ret); atomic_printf( "fetched %d random bytes (non-blockingly); first few bytes:\n ", ret); for (i = 0; i < 10; ++i) { atomic_printf("%02x", buf[i]); } atomic_puts(""); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/getsid.c000066400000000000000000000004341265436462100153420ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { pid_t sid = getsid(0); atomic_printf("getsid(0) session ID: %d\n", sid); test_assert(sid > 0); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/gettimeofday.c000066400000000000000000000007101265436462100165410ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { struct timeval* tv; struct timezone* tz; ALLOCATE_GUARD(tv, 0); ALLOCATE_GUARD(tz, 'x'); test_assert(0 == gettimeofday(tv, tz)); test_assert(tv->tv_sec > 0); test_assert(tz->tz_dsttime == 0); /* always zero on Linux */ VERIFY_GUARD(tv); VERIFY_GUARD(tz); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/goto_event.c000066400000000000000000000024611265436462100162360ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void first_breakpoint(void) { int break_here = 1; (void)break_here; } static void second_breakpoint(void) { int break_here = 1; (void)break_here; } static void* child_thread(void* num_syscallsp) { int num_syscalls = (uintptr_t)num_syscallsp; int i; first_breakpoint(); /* NB: this test assumes that geteuid() produces at least one * trace event per syscall. */ atomic_printf("%d: running %d syscalls ...\n", getpid(), num_syscalls); for (i = 0; i < num_syscalls; ++i) { geteuid(); } second_breakpoint(); return NULL; } static void child(int num_syscalls) { pthread_t t; test_assert(0 == pthread_create(&t, NULL, child_thread, (void*)(uintptr_t)num_syscalls)); pthread_join(t, NULL); exit(0); } int main(int argc, char** argv) { int num_syscalls; pid_t c; int status; test_assert(argc == 2); num_syscalls = atoi(argv[1]); if (0 == (c = fork())) { child(num_syscalls); test_assert("Not reached" && 0); } atomic_printf("%d: waiting on %d ...\n", getpid(), c); test_assert(c == waitpid(c, &status, 0)); test_assert(WIFEXITED(status) && 0 == WEXITSTATUS(status)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/goto_event.py000066400000000000000000000005511265436462100164420ustar00rootroot00000000000000from rrutil import * send_gdb('b first_breakpoint') expect_gdb('Breakpoint 1') send_gdb('b second_breakpoint') expect_gdb('Breakpoint 2') send_gdb('c') # If we hit first_breakpoint, then we never continue and never reach # second_breakpoint. expect_gdb('Breakpoint 2, second_breakpoint') restart_replay(1) expect_gdb('Breakpoint 1, first_breakpoint') ok() rr-4.1.0/src/test/goto_event.run000066400000000000000000000001411265436462100166110ustar00rootroot00000000000000source `dirname $0`/util.sh EVENTS=1000 record $TESTNAME $EVENTS debug goto_event "-g $EVENTS" rr-4.1.0/src/test/grandchild_threads.c000066400000000000000000000011111265436462100176650ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void* start_thread(void* p) { sleep(1000); return NULL; } int main(int argc, char** argv) { pid_t child; pthread_t thread; int pipe_fds[2]; char ch; pipe(pipe_fds); child = fork(); if (child > 0) { read(pipe_fds[0], &ch, 1); kill(child, 9); /* try to exit before the child's exit */ return 0; } pthread_create(&thread, NULL, start_thread, NULL); atomic_puts("EXIT-SUCCESS"); write(pipe_fds[1], &ch, 1); sleep(1000); return 0; } rr-4.1.0/src/test/grandchild_threads_main_running.c000066400000000000000000000011521265436462100224360ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int pipe_fds[2]; static void* start_thread(void* p) { char ch; sleep(1); atomic_puts("EXIT-SUCCESS"); write(pipe_fds[1], &ch, 1); sleep(1000); return NULL; } int main(int argc, char** argv) { pid_t child; pthread_t thread; char ch; pipe(pipe_fds); child = fork(); if (child > 0) { read(pipe_fds[0], &ch, 1); kill(child, 9); /* try to exit before the child's exit */ return 0; } pthread_create(&thread, NULL, start_thread, NULL); while (1) { } return 0; } rr-4.1.0/src/test/grandchild_threads_parent_alive.c000066400000000000000000000011541265436462100224250ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void* start_thread(void* p) { sleep(1000); return NULL; } int main(int argc, char** argv) { pid_t child; pthread_t thread; int pipe_fds[2]; char ch; pipe(pipe_fds); child = fork(); if (child > 0) { read(pipe_fds[0], &ch, 1); kill(child, 9); /* wait for the child to exit before we exit */ waitpid(child, NULL, 0); return 0; } pthread_create(&thread, NULL, start_thread, NULL); atomic_puts("EXIT-SUCCESS"); write(pipe_fds[1], &ch, 1); sleep(1000); return 0; } rr-4.1.0/src/test/grandchild_threads_thread_running.c000066400000000000000000000011301265436462100227550ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void* start_thread(void* p) { while (1) { } return NULL; } int main(int argc, char** argv) { pid_t child; pthread_t thread; int pipe_fds[2]; char ch; pipe(pipe_fds); child = fork(); if (child > 0) { read(pipe_fds[0], &ch, 1); kill(child, 9); /* try to exit before the child's exit */ return 0; } pthread_create(&thread, NULL, start_thread, NULL); sleep(1); atomic_puts("EXIT-SUCCESS"); write(pipe_fds[1], &ch, 1); sleep(1000); return 0; } rr-4.1.0/src/test/hardlink_mmapped_files.run000066400000000000000000000003131265436462100211220ustar00rootroot00000000000000source `dirname $0`/util.sh cp $OBJDIR/lib/libtest_lib$bitness.so . RECORD_ARGS="--env=LD_PRELOAD=libtest_lib$bitness.so" record constructor$bitness rm libtest_lib$bitness.so replay check EXIT-SUCCESS rr-4.1.0/src/test/hello.c000066400000000000000000000002311265436462100151610ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(void) { atomic_puts("Hi"); return 0; } rr-4.1.0/src/test/hello.run000066400000000000000000000000561265436462100155500ustar00rootroot00000000000000source `dirname $0`/util.sh compare_test 'Hi' rr-4.1.0/src/test/ignored_async_usr1.c000066400000000000000000000010231265436462100176540ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { int dummy = 0, i; /* NB: since we're masking out the signal, there's no way for * us to tell whether or not it was actually delivered. This * test can spuriously pass if it's never sent SIGUSR1. */ signal(SIGUSR1, SIG_IGN); atomic_puts("SIGUSR1 disabled"); for (i = 1; i < (1 << 27); ++i) { dummy += (dummy + i) % 9735; } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/ignored_async_usr1.run000066400000000000000000000007121265436462100202420ustar00rootroot00000000000000source `dirname $0`/util.sh SYNC_TOKEN=disabled record $TESTNAME & echo "Waiting for token '$SYNC_TOKEN' from tracee ..." until grep -q $SYNC_TOKEN record.out; do sleep 0 done echo " done. Delivering SIGUSR1 ..." kill -USR1 $rrpid $(pidof $TESTNAME-$nonce) # Wait for 'record' to actually terminate. Otherwise we might start # replaying before the trace file has been completely written. wait echo "Replaying ..." replay check 'EXIT-SUCCESS' rr-4.1.0/src/test/ignored_sigsegv.c000066400000000000000000000007731265436462100172470ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void* start_thread(void* p) { atomic_puts("EXIT-SUCCESS"); *(int*)NULL = 0; return NULL; } int main(int argc, char* argv[]) { struct sigaction act; pthread_t thread; act.sa_handler = SIG_IGN; act.sa_flags = SA_NODEFER; sigemptyset(&act.sa_mask); sigaction(SIGSEGV, &act, NULL); pthread_create(&thread, NULL, start_thread, NULL); pthread_join(thread, NULL); return 0; } rr-4.1.0/src/test/immediate_restart.c000066400000000000000000000006071265436462100175670ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char** argv) { int child; atomic_printf("%d: forking...\n", getpid()); if (0 == (child = fork())) { atomic_puts("EXIT-SUCCESS"); return 0; } atomic_printf("child %d\n", child); waitpid(child, NULL, 0); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/immediate_restart.py000066400000000000000000000003521265436462100177720ustar00rootroot00000000000000from rrutil import * restart_replay() # A single EXIT-SUCCESS is expected since the child process to which we have # attached only prints one, and it exits before the parent prints its # EXIT-SUCCESS. expect_rr('EXIT-SUCCESS') ok() rr-4.1.0/src/test/immediate_restart.run000066400000000000000000000002761265436462100201530ustar00rootroot00000000000000source `dirname $0`/util.sh record $TESTNAME TARGET_PID=$(grep 'child ' record.out | awk '{print $2}') echo Targeting recorded pid $TARGET_PID ... debug immediate_restart "-f $TARGET_PID" rr-4.1.0/src/test/int3.c000066400000000000000000000010041265436462100147320ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ static void breakpoint(void) { __asm__("int $3"); /* NB: the above instruction *must* be at line 3 in this file. * Tests rely on that. */ } #include "rrutil.h" static void handle_sigtrap(int sig) { atomic_puts("EXIT-SUCCESS"); _exit(0); } int main(int argc, char* argv[]) { signal(SIGTRAP, handle_sigtrap); atomic_puts("raising SIGTRAP ..."); breakpoint(); test_assert("didn't catch trap!" && 0); return 0; } rr-4.1.0/src/test/interrupt.c000066400000000000000000000006621265436462100161220ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" void spin(void) { int i; atomic_puts("spinning"); for (i = 1; i < (1 << 30); ++i) { if (0 == i % (1 << 20)) { write(STDOUT_FILENO, ".", 1); } if (0 == i % (79 * (1 << 20))) { write(STDOUT_FILENO, "\n", 1); } } } int main(int argc, char* argv[]) { spin(); atomic_puts("done"); return 0; } rr-4.1.0/src/test/interrupt.py000066400000000000000000000003241265436462100163230ustar00rootroot00000000000000from rrutil import * # XXX this test is racy, because we don't have a way to halt replay # until some condition is satisfied. Maybe we should add that. send_gdb('c') expect_rr('spinning') interrupt_gdb() ok() rr-4.1.0/src/test/interrupt.run000066400000000000000000000000471265436462100165010ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/intr_futex_wait_restart.c000066400000000000000000000050501265436462100210410ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static const char start_token = '!'; static const char sentinel_token = ' '; static pthread_t reader; static pthread_barrier_t barrier; static pid_t reader_tid; static int reader_caught_signal; static int sockfds[2]; pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; pthread_cond_t cond = PTHREAD_COND_INITIALIZER; static void cond_wait(int secs) { struct timespec ts; clock_gettime(CLOCK_REALTIME, &ts); ts.tv_sec += secs; test_assert(ETIMEDOUT == pthread_cond_timedwait(&cond, &lock, &ts)); } static void sighandler(int sig) { test_assert(sys_gettid() == reader_tid); ++reader_caught_signal; atomic_puts("r: in sighandler level 1 ..."); cond_wait(1); atomic_puts("r: ... wait done"); } static void* reader_thread(void* dontcare) { char token = start_token; struct sigaction act; int readsock = sockfds[1]; char c = sentinel_token; int flags = 0; pthread_mutex_lock(&lock); reader_tid = sys_gettid(); flags = SA_RESTART; act.sa_handler = sighandler; sigemptyset(&act.sa_mask); act.sa_flags = flags; sigaction(SIGUSR1, &act, NULL); act.sa_handler = SIG_IGN; sigemptyset(&act.sa_mask); act.sa_flags = flags; sigaction(SIGUSR2, &act, NULL); pthread_barrier_wait(&barrier); atomic_puts("r: blocking on read, awaiting signal ..."); test_assert(1 == read(readsock, &c, sizeof(c))); test_assert(1 == reader_caught_signal); atomic_printf("r: ... read level 0 '%c'\n", c); test_assert(c == token); return NULL; } int main(int argc, char* argv[]) { char token = start_token; struct timeval ts; /* (Kick on the syscallbuf if it's enabled.) */ gettimeofday(&ts, NULL); socketpair(AF_LOCAL, SOCK_STREAM, 0, sockfds); pthread_barrier_init(&barrier, NULL, 2); pthread_create(&reader, NULL, reader_thread, NULL); pthread_barrier_wait(&barrier); /* Force a blocked read() that's interrupted by a SIGUSR1, * which then itself blocks on read() and succeeds. */ atomic_puts("M: sleeping ..."); usleep(500000); atomic_puts("M: killing reader ..."); pthread_kill(reader, SIGUSR1); atomic_puts("M: (quick nap)"); usleep(100000); atomic_puts("M: killing reader again ..."); pthread_kill(reader, SIGUSR2); usleep(500000); atomic_printf("M: finishing level 0 reader by writing '%c' to socket ...\n", token); write(sockfds[0], &token, sizeof(token)); ++token; atomic_puts("M: ... done"); pthread_join(reader, NULL); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/intr_poll.c000066400000000000000000000020221265436462100160600ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int pipefds[2]; static int poll_pipe(int timeout_ms) { struct pollfd pfd; int ret; pfd.fd = pipefds[0]; pfd.events = POLLIN; errno = 0; ret = poll(&pfd, 1, timeout_ms); /* Verify that our input fields were not trashed */ test_assert(pfd.fd == pipefds[0]); test_assert(pfd.events == POLLIN); return ret; } static int caught_signal; static void handle_signal(int sig) { ++caught_signal; } int main(int argc, char* argv[]) { struct timespec dummy; test_assert(0 == pipe(pipefds)); signal(SIGALRM, SIG_IGN); alarm(1); atomic_puts("ignoring SIGALRM, going into poll ..."); test_assert(0 == poll_pipe(1500) && 0 == errno); signal(SIGALRM, handle_signal); alarm(1); atomic_puts("handling SIGALRM, going into poll ..."); clock_gettime(CLOCK_MONOTONIC, &dummy); test_assert(-1 == poll_pipe(-1) && EINTR == errno); test_assert(1 == caught_signal); atomic_puts("EXIT-SUCCESS"); return 1; } rr-4.1.0/src/test/intr_pselect.c000066400000000000000000000017171265436462100165630ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int pipefds[2]; static int pselect_pipe(int timeout) { fd_set set; FD_ZERO(&set); FD_SET(pipefds[0], &set); struct timespec t; t.tv_sec = timeout; t.tv_nsec = 0; sigset_t sigmask; sigemptyset(&sigmask); errno = 0; return pselect(pipefds[0] + 1, &set, NULL, NULL, timeout ? &t : NULL, &sigmask); } static int caught_signal; static void handle_signal(int sig) { ++caught_signal; } int main(int argc, char* argv[]) { pipe(pipefds); signal(SIGALRM, SIG_IGN); alarm(1); atomic_puts("ignoring SIGALRM, going into pselect ..."); test_assert(0 == pselect_pipe(2) && 0 == errno); signal(SIGALRM, handle_signal); alarm(1); atomic_puts("handling SIGALRM, going into pselect ..."); test_assert(-1 == pselect_pipe(0) && EINTR == errno); test_assert(1 == caught_signal); atomic_puts("EXIT-SUCCESS"); return 1; } rr-4.1.0/src/test/intr_ptrace_decline.c000066400000000000000000000055721265436462100200700ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static const char start_token = '!'; static const char sentinel_token = ' '; static pthread_t reader; static pthread_barrier_t barrier; static int sockfds[2]; pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; pthread_cond_t cond = PTHREAD_COND_INITIALIZER; static void fin_intr_sleep(int secs) { struct timespec req = {.tv_sec = secs }; struct timespec rem = {.tv_sec = -1, .tv_nsec = -1 }; test_assert(0 == nanosleep(&req, &rem)); /* We would normally assert that the outparam wasn't touched * for this successful sleep, but ptrace-declined signals are * an odd case, the only way a nanosleep can restart. The * kernel has been observed to write back the outparam at * interrupt time, so we track that semantics here. * * test_assert(-1 == rem.tv_sec && -1 == rem.tv_nsec); */ } static void fin_poll(int secs) { static int pipefds[2]; struct pollfd pfd; int ret; pipe(pipefds); pfd.fd = pipefds[0]; pfd.events = POLLIN; pfd.revents = -1; errno = 0; ret = poll(&pfd, 1, 1000 * secs); atomic_printf("r: poll() returns %d; pfd.revents = 0x%x\n", ret, pfd.revents); test_assert(0 == ret); test_assert(0 == pfd.revents); } static void cond_wait(int secs) { struct timespec ts; clock_gettime(CLOCK_REALTIME, &ts); ts.tv_sec += secs; test_assert(ETIMEDOUT == pthread_cond_timedwait(&cond, &lock, &ts)); } static void* reader_thread(void* dontcare) { char token = start_token; int readsock = sockfds[1]; char c = sentinel_token; pthread_mutex_lock(&lock); pthread_barrier_wait(&barrier); atomic_puts("r: blocking on sleep, awaiting signal ..."); fin_intr_sleep(1); atomic_puts("r: blocking on poll, awaiting signal ..."); fin_poll(1); atomic_puts("r: blocking on futex, awaiting signal ..."); cond_wait(1); atomic_puts("r: blocking on read, awaiting signal ..."); test_assert(1 == read(readsock, &c, sizeof(c))); atomic_printf("r: ... read '%c'\n", c); test_assert(c == token); return NULL; } int main(int argc, char* argv[]) { char token = start_token; struct timeval ts; int i; /* (Kick on the syscallbuf if it's enabled.) */ gettimeofday(&ts, NULL); socketpair(AF_LOCAL, SOCK_STREAM, 0, sockfds); pthread_barrier_init(&barrier, NULL, 2); pthread_create(&reader, NULL, reader_thread, NULL); pthread_barrier_wait(&barrier); atomic_puts("M: sleeping ..."); usleep(500000); for (i = 0; i < 4; ++i) { atomic_puts("M: killing reader ..."); pthread_kill(reader, SIGUSR1); atomic_puts("M: sleeping ..."); sleep(1); } atomic_printf("M: finishing original reader by writing '%c' to socket ...\n", token); write(sockfds[0], &token, sizeof(token)); ++token; atomic_puts("M: ... done"); pthread_join(reader, NULL); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/intr_ptrace_decline.run000066400000000000000000000001741265436462100204430ustar00rootroot00000000000000source `dirname $0`/util.sh # Ignore SIGUSR1; block its delivery to tracees. RECORD_ARGS="-i10" compare_test EXIT-SUCCESS rr-4.1.0/src/test/intr_read_no_restart.c000066400000000000000000000054421265436462100202760ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static const char start_token = '!'; static const char sentinel_token = ' '; static pthread_t reader; static pthread_barrier_t barrier; static pid_t reader_tid; static int reader_caught_signal; static int sockfds[2]; static void sighandler(int sig) { char c = sentinel_token; test_assert(sys_gettid() == reader_tid); ++reader_caught_signal; atomic_puts("r: in sighandler level 1 ..."); test_assert(-1 == read(sockfds[1], &c, sizeof(c)) && EINTR == errno); atomic_printf("r: ... read level 1 '%c'\n", c); test_assert(c == sentinel_token); } static void sighandler2(int sig) { char c = sentinel_token; test_assert(sys_gettid() == reader_tid); ++reader_caught_signal; atomic_puts("r: in sighandler level 2 ..."); test_assert(1 == read(sockfds[1], &c, sizeof(c))); atomic_printf("r: ... read level 2 '%c'\n", c); test_assert(c == start_token); } static void* reader_thread(void* dontcare) { struct sigaction act; struct timeval ts; int readsock = sockfds[1]; char c = sentinel_token; int flags = 0; reader_tid = sys_gettid(); act.sa_handler = sighandler; sigemptyset(&act.sa_mask); act.sa_flags = flags; sigaction(SIGUSR1, &act, NULL); act.sa_handler = sighandler2; sigemptyset(&act.sa_mask); act.sa_flags = flags; sigaction(SIGUSR2, &act, NULL); pthread_barrier_wait(&barrier); /* (Put another record in the syscallbuf.) */ gettimeofday(&ts, NULL); atomic_puts("r: blocking on read, awaiting signal ..."); test_assert(-1 == read(readsock, &c, sizeof(c)) && EINTR == errno); test_assert(2 == reader_caught_signal); atomic_printf("r: ... read level 0 '%c'\n", c); test_assert(c == sentinel_token); return NULL; } int main(int argc, char* argv[]) { char token = start_token; struct timeval ts; /* (Kick on the syscallbuf if it's enabled.) */ gettimeofday(&ts, NULL); socketpair(AF_LOCAL, SOCK_STREAM, 0, sockfds); pthread_barrier_init(&barrier, NULL, 2); pthread_create(&reader, NULL, reader_thread, NULL); pthread_barrier_wait(&barrier); /* Force a blocked read() that's interrupted by a SIGUSR1, * which then itself blocks on read() and succeeds. */ atomic_puts("M: sleeping ..."); usleep(500000); atomic_puts("M: killing reader ..."); pthread_kill(reader, SIGUSR1); atomic_puts("M: (quick nap)"); usleep(100000); atomic_puts("M: killing reader again ..."); pthread_kill(reader, SIGUSR2); atomic_puts("M: (longer nap)"); usleep(500000); atomic_printf("M: finishing level 2 reader by writing '%c' to socket ...\n", token); write(sockfds[0], &token, sizeof(token)); ++token; atomic_puts("M: ... done"); pthread_join(reader, NULL); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/intr_read_restart.c000066400000000000000000000061131265436462100175760ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static const char start_token = '!'; static const char sentinel_token = ' '; static pthread_t reader; static pthread_barrier_t barrier; static pid_t reader_tid; static int reader_caught_signal; static int sockfds[2]; static void sighandler(int sig) { char c = sentinel_token; test_assert(sys_gettid() == reader_tid); ++reader_caught_signal; atomic_puts("r: in sighandler level 1 ..."); test_assert(1 == read(sockfds[1], &c, sizeof(c))); atomic_printf("r: ... read level 1 '%c'\n", c); test_assert(c == start_token + 1); } static void sighandler2(int sig) { char c = sentinel_token; test_assert(sys_gettid() == reader_tid); ++reader_caught_signal; atomic_puts("r: in sighandler level 2 ..."); test_assert(1 == read(sockfds[1], &c, sizeof(c))); atomic_printf("r: ... read level 2 '%c'\n", c); test_assert(c == start_token); } static void* reader_thread(void* dontcare) { char token = start_token; struct sigaction act; int readsock = sockfds[1]; char c = sentinel_token; int flags = 0; reader_tid = sys_gettid(); flags = SA_RESTART; act.sa_handler = sighandler; sigemptyset(&act.sa_mask); act.sa_flags = flags; sigaction(SIGUSR1, &act, NULL); act.sa_handler = sighandler2; sigemptyset(&act.sa_mask); act.sa_flags = flags; sigaction(SIGUSR2, &act, NULL); pthread_barrier_wait(&barrier); atomic_puts("r: blocking on read, awaiting signal ..."); test_assert(1 == read(readsock, &c, sizeof(c))); test_assert(2 == reader_caught_signal); token += reader_caught_signal; atomic_printf("r: ... read level 0 '%c'\n", c); test_assert(c == token); return NULL; } int main(int argc, char* argv[]) { char token = start_token; struct timeval ts; /* (Kick on the syscallbuf if it's enabled.) */ gettimeofday(&ts, NULL); socketpair(AF_LOCAL, SOCK_STREAM, 0, sockfds); pthread_barrier_init(&barrier, NULL, 2); pthread_create(&reader, NULL, reader_thread, NULL); pthread_barrier_wait(&barrier); /* Force a blocked read() that's interrupted by a SIGUSR1, * which then itself blocks on read() and succeeds. */ atomic_puts("M: sleeping ..."); usleep(500000); atomic_puts("M: killing reader ..."); pthread_kill(reader, SIGUSR1); atomic_puts("M: (quick nap)"); usleep(100000); atomic_puts("M: killing reader again ..."); pthread_kill(reader, SIGUSR2); atomic_puts("M: (longer nap)"); usleep(500000); atomic_printf("M: finishing level 2 reader by writing '%c' to socket ...\n", token); write(sockfds[0], &token, sizeof(token)); ++token; usleep(500000); atomic_printf("M: finishing level 1 reader by writing '%c' to socket ...\n", token); write(sockfds[0], &token, sizeof(token)); ++token; usleep(500000); atomic_printf("M: finishing original reader by writing '%c' to socket ...\n", token); write(sockfds[0], &token, sizeof(token)); ++token; atomic_puts("M: ... done"); pthread_join(reader, NULL); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/intr_sleep.c000066400000000000000000000020041265436462100162220ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpoint(void) { int break_here = 1; (void)break_here; } static int interrupted_sleep(void) { struct timespec ts = {.tv_sec = 2 }; alarm(1); errno = 0; /* The implementation of sleep() is technically allowed to use * SIGALRM, so we have to use nanosleep() for pedantry. */ nanosleep(&ts, NULL); return errno; } static int caught_signal; static void handle_signal(int sig) { ++caught_signal; breakpoint(); /* No more syscalls after here. */ } int main(int argc, char* argv[]) { int err; signal(SIGALRM, SIG_IGN); err = interrupted_sleep(); atomic_printf("No sighandler; sleep exits with errno %d\n", err); test_assert(0 == err); signal(SIGALRM, handle_signal); err = interrupted_sleep(); atomic_printf("With sighandler; sleep exits with errno %d\n", err); test_assert(1 == caught_signal); test_assert(EINTR == err); atomic_puts("EXIT-SUCCESS"); return 1; } rr-4.1.0/src/test/intr_sleep_no_restart.c000066400000000000000000000043261265436462100204730ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static pthread_t reader; static pthread_barrier_t barrier; static pid_t reader_tid; static int reader_caught_signal; static void intr_sleep(int secs) { struct timespec req = {.tv_sec = secs }; struct timespec rem = { 0 }; test_assert(-1 == nanosleep(&req, &rem) && EINTR == errno); test_assert(rem.tv_sec > 0 || rem.tv_nsec > 0); } static void fin_sleep(int secs) { struct timespec req = {.tv_sec = secs }; struct timespec rem = {.tv_sec = -1, .tv_nsec = -1 }; test_assert(0 == nanosleep(&req, &rem)); test_assert(-1 == rem.tv_sec && -1 == rem.tv_nsec); } static void sighandler(int sig) { test_assert(sys_gettid() == reader_tid); ++reader_caught_signal; atomic_puts("r: in sighandler level 1 ..."); intr_sleep(2); } static void sighandler2(int sig) { test_assert(sys_gettid() == reader_tid); ++reader_caught_signal; atomic_puts("r: in sighandler level 2 ..."); fin_sleep(1); } static void* reader_thread(void* dontcare) { struct sigaction act; int flags = 0; reader_tid = sys_gettid(); act.sa_handler = sighandler; sigemptyset(&act.sa_mask); act.sa_flags = flags; sigaction(SIGUSR1, &act, NULL); act.sa_handler = sighandler2; sigemptyset(&act.sa_mask); act.sa_flags = flags; sigaction(SIGUSR2, &act, NULL); pthread_barrier_wait(&barrier); atomic_puts("r: blocking on sleep, awaiting signal ..."); intr_sleep(3); return NULL; } int main(int argc, char* argv[]) { struct timeval ts; /* (Kick on the syscallbuf if it's enabled.) */ gettimeofday(&ts, NULL); pthread_barrier_init(&barrier, NULL, 2); pthread_create(&reader, NULL, reader_thread, NULL); pthread_barrier_wait(&barrier); /* Force a blocked read() that's interrupted by a SIGUSR1, * which then itself blocks on read() and succeeds. */ atomic_puts("M: sleeping ..."); usleep(500000); atomic_puts("M: killing reader ..."); pthread_kill(reader, SIGUSR1); atomic_puts("M: (quick nap)"); usleep(100000); atomic_puts("M: killing reader again ..."); pthread_kill(reader, SIGUSR2); atomic_puts("M: ... done"); pthread_join(reader, NULL); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/invalid_fcntl.c000066400000000000000000000004541265436462100167010ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { /* Do an invalid fcntl command on valid fd 0 */ test_assert(-1 == fcntl(0, 9999)); test_assert(errno == EINVAL); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/io.c000066400000000000000000000004111265436462100144650ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { char buf[32]; int garbage_fd = 1 << 30; read(garbage_fd, buf, sizeof(buf)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/ioctl.c000066400000000000000000000006741265436462100152030ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { int pipe_fds[2]; test_assert(0 == pipe(pipe_fds)); test_assert(0 == ioctl(pipe_fds[0], FIOCLEX)); test_assert(FD_CLOEXEC == fcntl(pipe_fds[0], F_GETFD)); test_assert(0 == ioctl(pipe_fds[0], FIONCLEX)); test_assert(0 == fcntl(pipe_fds[0], F_GETFD)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/legacy_ugid.c000066400000000000000000000032701265436462100163400ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" /* We use this structure to verify that, on architectures supporting UID16 * syscalls, rr properly records and replays only 16-bit values. */ union legacy_id { uint16_t u16[2]; uint32_t u32; }; #define UID_COOKIE 0xd05e static void initialize_legacy_ids(size_t n, union legacy_id* ids) { size_t i; for (i = 0; i < n; ++i) { ids[i].u16[0] = 0; ids[i].u16[1] = UID_COOKIE; } }; static void verify_results(size_t n, union legacy_id* ids) { size_t i; for (i = 0; i < n; ++i) { #if defined(__i386__) // For UID16 syscall-supporting archs, the cookie should be intact. test_assert(ids[i].u16[1] == UID_COOKIE); #elif defined(__x86_64__) // For UID32 archs, assume that the user doesn't have a UID with the // upper bits equivalent to our cookie. This is not a great assumption, // but we don't really have anything better. test_assert(ids[i].u16[1] != UID_COOKIE); #else #error unknown architecture #endif } } int main(int argc, char* argv[]) { union legacy_id resuid_results[3]; union legacy_id resgid_results[3]; initialize_legacy_ids(ALEN(resuid_results), resuid_results); test_assert(0 == syscall(SYS_getresuid, &resuid_results[0], &resuid_results[1], &resuid_results[2])); verify_results(ALEN(resuid_results), resuid_results); initialize_legacy_ids(ALEN(resgid_results), resgid_results); test_assert(0 == syscall(SYS_getresgid, &resgid_results[0], &resgid_results[1], &resgid_results[2])); verify_results(ALEN(resgid_results), resgid_results); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/link.c000066400000000000000000000020021265436462100150110ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define TOKEN "ABC" #define TOKEN_SIZE sizeof(TOKEN) static const char token_file[] = "rr-link-file.txt"; static const char link_name[] = "rr-link-file.link"; void verify_token(int fd) { ssize_t len; char buf[TOKEN_SIZE]; len = read(fd, buf, sizeof(buf)); if (len != TOKEN_SIZE || strcmp(buf, TOKEN)) { atomic_puts("Internal error: FAILED: splice wrote the wrong data"); exit(1); } atomic_puts("Got expected token " TOKEN); } int main(void) { int fd; fd = open(token_file, O_RDWR | O_CREAT | O_TRUNC, 0600); write(fd, TOKEN, TOKEN_SIZE); close(fd); if (link(token_file, link_name)) { atomic_puts("Internal error: FAILED: link not created"); exit(1); } fd = open(link_name, O_RDONLY); verify_token(fd); close(fd); unlink(token_file); fd = open(link_name, O_RDONLY); verify_token(fd); close(fd); unlink(link_name); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/link.run000066400000000000000000000002401265436462100153750ustar00rootroot00000000000000source `dirname $0`/util.sh # NB: this test creates garbage, but it will be cleaned up along with # the other files in the tmp dir. compare_test 'EXIT-SUCCESS' rr-4.1.0/src/test/madvise.c000066400000000000000000000032711265436462100155150ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define PAGE_ZEROES (PAGE_SIZE / sizeof(int)) static int count_page_zeroes(int* p) { int zeroes = 0; int i; for (i = 0; i < PAGE_SIZE / sizeof(*p); ++i) { if (!p[i]) { ++zeroes; } } return zeroes; } static void set_page_values_nonzero(int* p) { int i; for (i = 0; i < PAGE_SIZE / sizeof(*p); ++i) { p[i] = i + 1; } } int main(int argc, char* argv[]) { int* page; void* fixed_area; fixed_area = mmap(NULL, PAGE_SIZE * 5, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); test_assert(fixed_area != MAP_FAILED); test_assert(0 == munmap(fixed_area, PAGE_SIZE * 5)); page = mmap(fixed_area + PAGE_SIZE, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); test_assert(page != MAP_FAILED); test_assert(count_page_zeroes(page) == PAGE_ZEROES); set_page_values_nonzero(page); test_assert(0 == madvise(page, PAGE_SIZE, MADV_DONTNEED)); test_assert(count_page_zeroes(page) == PAGE_ZEROES); set_page_values_nonzero(page); test_assert(0 == madvise(page, 1, MADV_DONTNEED)); test_assert(count_page_zeroes(page) == PAGE_ZEROES); set_page_values_nonzero(page); test_assert(-1 == madvise(fixed_area - 1, PAGE_SIZE * 5, MADV_DONTNEED)); test_assert(EINVAL == errno); /* check this madvise had no effect */ test_assert(count_page_zeroes(page) < PAGE_ZEROES); test_assert(-1 == madvise(fixed_area, PAGE_SIZE * 5, MADV_DONTNEED)); test_assert(ENOMEM == errno); /* check this madvise did take effect */ test_assert(count_page_zeroes(page) == PAGE_ZEROES); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/madvise_dontfork.c000066400000000000000000000014051265436462100174200ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpoint(void) {} int main(int argc, char* argv[]) { char* page; pid_t pid; int status; page = mmap(NULL, PAGE_SIZE * 2, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); test_assert(page != MAP_FAILED); test_assert(0 == madvise(page, PAGE_SIZE, MADV_DONTFORK)); breakpoint(); page[0] = 1; pid = fork(); if (!pid) { test_assert(-1 == madvise(page, PAGE_SIZE, MADV_NORMAL)); test_assert(ENOMEM == errno); page[PAGE_SIZE] = 2; atomic_puts("EXIT-SUCCESS"); return 77; } test_assert(pid == wait(&status)); test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 77); return 0; } rr-4.1.0/src/test/madvise_dontfork.py000066400000000000000000000004151265436462100176260ustar00rootroot00000000000000from rrutil import * send_gdb('break breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('checkpoint') expect_gdb('= 1') send_gdb('next') send_gdb('restart 1') expect_gdb('stopped') send_gdb('c') expect_rr('EXIT-SUCCESS') ok() rr-4.1.0/src/test/madvise_dontfork.run000066400000000000000000000000471265436462100200030ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/main_thread_exit.c000066400000000000000000000010241265436462100173630ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static pthread_t main_thread; static void breakpoint(void) {} static void* start_thread(void* p) { test_assert(0 == pthread_join(main_thread, NULL)); breakpoint(); atomic_puts("EXIT-SUCCESS"); return NULL; } int main(int argc, char** argv) { pthread_t thread; main_thread = pthread_self(); test_assert(0 == pthread_create(&thread, NULL, start_thread, NULL)); pthread_exit(NULL); test_assert(0); return 0; } rr-4.1.0/src/test/main_thread_exit.py000066400000000000000000000003771265436462100176030ustar00rootroot00000000000000from rrutil import * send_gdb('break breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('checkpoint') send_gdb('n') send_gdb('restart 1') send_gdb('c') expect_rr('EXIT-SUCCESS') expect_gdb('exited normally') ok() rr-4.1.0/src/test/main_thread_exit.run000066400000000000000000000000471265436462100177510ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/map_fixed.c000066400000000000000000000011751265436462100160220ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { size_t page_size = sysconf(_SC_PAGESIZE); uint8_t* map1 = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); uint8_t* map1_end = map1 + page_size; uint8_t* map2; test_assert(map1 != (void*)-1); map2 = mmap(map1_end, page_size, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); test_assert(map2 != (void*)-1); test_assert(map2 == map1_end); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/memfd_create.c000066400000000000000000000010641265436462100164760ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define TEST_MEMFD "foo" #ifndef MFD_CLOEXEC #define MFD_CLOEXEC 0x0001 #define MFD_ALLOW_SEALING 0x0002 #endif int main(int argc, char* argv[]) { int fd; /* There's no libc helper for this syscall. */ fd = syscall(RR_memfd_create, TEST_MEMFD, MFD_ALLOW_SEALING); if (-1 == fd && ENOSYS == errno) { atomic_puts("SYS_memfd_create not supported on this kernel"); } else { test_assert(fd >= 0); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/mincore.c000066400000000000000000000007561265436462100155260ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define BUF_SIZE 10 int main(int argc, char* argv[]) { unsigned char* buf; void* p = (void*)((long)&argc & ~(long)(PAGE_SIZE - 1)); ALLOCATE_GUARD(buf, 'q'); test_assert(0 == mincore(p, PAGE_SIZE, buf)); /* I guess we can't actually check mincore's results in any way */ VERIFY_GUARD(buf); atomic_printf("In-core=%d\n", *buf & 1); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/mknod.c000066400000000000000000000010721265436462100151720ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define FILENAME "foo" #define MODE (S_IFIFO) int main(int argc, char* argv[]) { int result; struct stat* st; result = mknod(FILENAME, MODE, 0); test_assert(result == 0); ALLOCATE_GUARD(st, 'x'); test_assert(stat(FILENAME, st) == 0); test_assert(st->st_mode == MODE); FREE_GUARD(st); result = mknod(FILENAME, MODE, 0); test_assert(result < 0); test_assert(errno == EEXIST); unlink(FILENAME); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/mlock.c000066400000000000000000000010401265436462100151620ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(void) { void* p = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); test_assert(p != MAP_FAILED); test_assert(0 == mlock(p, PAGE_SIZE) || errno == ENOMEM || errno == EPERM); test_assert(0 == munlock(p, PAGE_SIZE)); test_assert(0 == mlockall(MCL_CURRENT) || errno == ENOMEM || errno == EPERM); test_assert(0 == munlockall()); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/mmap_discontinuous.c000066400000000000000000000022421265436462100200020ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int create_segment(size_t num_bytes) { char filename[] = "/dev/shm/rr-test-XXXXXX"; int fd = mkstemp(filename); unlink(filename); test_assert(fd >= 0); ftruncate(fd, num_bytes); return fd; } int main(int argc, char* argv[]) { size_t page_size = sysconf(_SC_PAGESIZE); int fd = create_segment(3 * page_size); uint8_t* wpage1 = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd, 0); uint8_t* wpage2 = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd, 2 * page_size); test_assert(wpage1 != (void*)-1 && wpage2 != (void*)-1); test_assert(wpage1 != wpage2); test_assert(wpage2 - wpage1 == page_size || wpage1 - wpage2 == page_size); wpage1 = mmap(NULL, page_size, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); wpage2 = mmap(NULL, page_size, PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 2 * page_size); test_assert(wpage1 != (void*)-1 && wpage2 != (void*)-1); test_assert(wpage1 != wpage2); test_assert(wpage2 - wpage1 == page_size || wpage1 - wpage2 == page_size); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/mmap_private.c000066400000000000000000000016531265436462100165530ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpoint(void) { int break_here = 1; (void)break_here; } int main(int argc, char* argv[]) { size_t num_bytes = sysconf(_SC_PAGESIZE); int fd = open(argv[0], O_RDONLY); int* wpage; int* rpage; int i; test_assert(fd >= 0); breakpoint(); wpage = mmap(NULL, num_bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); breakpoint(); rpage = mmap(NULL, num_bytes, PROT_READ, MAP_PRIVATE, fd, 0); test_assert(wpage != (void*)-1 && rpage != (void*)-1 && rpage != wpage); breakpoint(); for (i = 0; i < num_bytes / sizeof(int); ++i) { int magic; test_assert(wpage[i] == rpage[i]); magic = rpage[i] * 31 + 3; wpage[i] = magic; test_assert(rpage[i] != magic && wpage[i] == magic); atomic_printf("%d:%d,", rpage[i], wpage[i]); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/mmap_ro.c000066400000000000000000000011241265436462100155120ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define DUMMY_FILE "dummy.txt" int main(int argc, char* argv[]) { size_t num_bytes = sysconf(_SC_PAGESIZE); int fd = open(DUMMY_FILE, O_CREAT | O_EXCL | O_RDWR, 0600); int one = 1; int* rpage; test_assert(fd >= 0); test_assert(sizeof(one) == write(fd, &one, sizeof(one))); test_assert(0 == fchmod(fd, 0400)); rpage = mmap(NULL, num_bytes, PROT_READ, MAP_SHARED, fd, 0); test_assert(rpage != (void*)-1); unlink(DUMMY_FILE); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/mmap_shared.c000066400000000000000000000033171265436462100163460ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int create_segment(size_t num_bytes) { char filename[] = "/dev/shm/rr-test-XXXXXX"; int fd = mkstemp(filename); unlink(filename); test_assert(fd >= 0); ftruncate(fd, num_bytes); return fd; } struct mmap_arg_struct { unsigned long addr; unsigned long len; unsigned long prot; unsigned long flags; unsigned long fd; unsigned long offset; }; static void run_test(void) { size_t num_bytes = sysconf(_SC_PAGESIZE); int fd = create_segment(num_bytes); int* wpage = mmap(NULL, num_bytes, PROT_WRITE, MAP_SHARED, fd, 0); int i; int* rpage; close(128); munmap(NULL, 0); #if defined(__i386__) struct mmap_arg_struct args; args.addr = 0; args.len = num_bytes; args.prot = PROT_READ; args.flags = MAP_SHARED; args.fd = fd; args.offset = 0; rpage = (int*)syscall(SYS_mmap, &args, -1, -1, -1, -1, -1); #elif defined(__x86_64__) rpage = (int*)syscall(SYS_mmap, 0, num_bytes, PROT_READ, MAP_SHARED, fd, (off_t)0); #else #error unknown architecture #endif test_assert(wpage != (void*)-1 && rpage != (void*)-1 && rpage != wpage); close(128); for (i = 0; i < num_bytes / sizeof(int); ++i) { wpage[i] = i; test_assert(rpage[i] == i); } } int main(int argc, char* argv[]) { pid_t c; int status; atomic_printf("%d: checking shared maps ...\n", getpid()); run_test(); if (0 == (c = fork())) { atomic_printf("%d: and in fork child ...\n", getpid()); run_test(); exit(0); } test_assert(c == waitpid(c, &status, 0) && WIFEXITED(status) && 0 == WEXITSTATUS(status)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/mmap_shared_multiple.c000066400000000000000000000007711265436462100202620ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { char* p; char* q; p = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); test_assert(p != MAP_FAILED); q = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); test_assert(q != MAP_FAILED); *p = 'a'; test_assert(*q == 0); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/mmap_shared_prot.c000066400000000000000000000015731265436462100174140ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpoint(void) {} int main(int argc, char* argv[]) { pid_t child; int status; char* p; /* Do a dummy waitpid so the real one doesn't go through the linker, patching etc */ waitpid(-2, NULL, 0); p = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); test_assert(p != MAP_FAILED); *p = 'a'; if ((child = fork()) == 0) { while (*(char*)p == 'a') { sched_yield(); } return 0; } test_assert(0 == mprotect(p, PAGE_SIZE, PROT_READ)); breakpoint(); test_assert(0 == mprotect(p, PAGE_SIZE, PROT_READ | PROT_WRITE)); *p = *p + 1; test_assert(*p == 'b'); test_assert(child == waitpid(child, &status, 0)); test_assert(0 == status); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/mmap_shared_prot.py000066400000000000000000000004501265436462100176130ustar00rootroot00000000000000from rrutil import * send_gdb('break breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('check') expect_gdb('= 1') send_gdb('c') expect_gdb('exited normally') send_gdb('restart 1') expect_gdb('stopped') send_gdb('c') expect_gdb('exited normally') ok() rr-4.1.0/src/test/mmap_shared_prot.run000066400000000000000000000000471265436462100177710ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/mmap_shared_subpage.c000066400000000000000000000015521265436462100200530ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int create_segment(size_t num_bytes) { char filename[] = "/dev/shm/rr-test-XXXXXX"; int fd = mkstemp(filename); unlink(filename); test_assert(fd >= 0); ftruncate(fd, num_bytes); return fd; } int main(int argc, char* argv[]) { size_t num_bytes = 120; /* Not a multiple of the page size */ int fd = create_segment(num_bytes); int* wpage = mmap(NULL, num_bytes, PROT_WRITE, MAP_SHARED, fd, 0); int* rpage = mmap(NULL, num_bytes, PROT_READ, MAP_SHARED, fd, 0); int i; test_assert(wpage != (void*)-1 && rpage != (void*)-1 && rpage != wpage); close(128); for (i = 0; i < num_bytes / sizeof(int); ++i) { wpage[i] = i; test_assert(rpage[i] == i); atomic_printf("%d,", rpage[i]); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/mmap_short_file.c000066400000000000000000000020321265436462100172270ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define DUMMY_FILE "dummy.txt" static void sighandler(int sig) { atomic_printf("caught signal %d, exiting\n", sig); atomic_puts("EXIT-SUCCESS"); _exit(0); } int main(int argc, char* argv[]) { size_t page_size = sysconf(_SC_PAGESIZE); int fd = open(DUMMY_FILE, O_CREAT | O_EXCL | O_RDWR, 0600); int one = 1; int* rpage; unlink(DUMMY_FILE); test_assert(fd >= 0); test_assert(sizeof(one) == write(fd, &one, sizeof(one))); rpage = mmap(NULL, page_size * 2, PROT_READ, MAP_PRIVATE, fd, 0); test_assert(rpage != (void*)-1); test_assert(*rpage == 1); signal(SIGSEGV, sighandler); signal(SIGBUS, sighandler); /* This should generate a SIGBUS, but the test will pass whether the kernel generates SIGBUS or SIGSEGV. rr checks that the same signal is produced during replay as during recording. */ char ch = *((char*)rpage + page_size); atomic_printf("FAILED: no segfault, read %d", ch); return 0; } rr-4.1.0/src/test/mmap_tmpfs.c000066400000000000000000000011601265436462100162230ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define TEST_FILENAME "foo.so" #define TOKEN "hello kitty" int main(int argc, char* argv[]) { int fd = open(TEST_FILENAME, O_CREAT | O_EXCL | O_RDWR, 0700); char* bytes; write(fd, TOKEN, sizeof(TOKEN)); close(fd); fd = open(TEST_FILENAME, O_RDONLY); bytes = (char*)mmap(NULL, 4096, PROT_READ | PROT_EXEC, MAP_PRIVATE, fd, 0); test_assert(bytes != MAP_FAILED); test_assert(!strcmp(bytes, TOKEN)); munmap(bytes, 4096); close(fd); unlink(TEST_FILENAME); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/mmap_write.c000066400000000000000000000022151265436462100162260ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define DUMMY_FILE "dummy.txt" static const int magic = 0x5a5a5a5a; static void overwrite_file(const char* path, ssize_t num_bytes) { int fd = open(path, O_TRUNC | O_RDWR, 0600); int i; test_assert(fd >= 0); for (i = 0; i < num_bytes / sizeof(magic); ++i) { write(fd, &magic, sizeof(magic)); } close(fd); } int main(int argc, char* argv[]) { size_t num_bytes = sysconf(_SC_PAGESIZE); int fd = open(DUMMY_FILE, O_CREAT | O_EXCL | O_RDWR, 0600); int* rpage; int i; test_assert(fd >= 0); overwrite_file(DUMMY_FILE, num_bytes); rpage = mmap(NULL, num_bytes, PROT_READ, MAP_SHARED, fd, 0); atomic_printf("rpage:%p\n", rpage); test_assert(rpage != (void*)-1); for (i = 0; i < num_bytes / sizeof(magic); ++i) { test_assert(rpage[i] == magic); } lseek(fd, 0, SEEK_SET); for (i = 0; i < num_bytes / sizeof(i); ++i) { int written; write(fd, &i, sizeof(i)); written = rpage[i]; atomic_printf("(wrote %d, read %d)", i, written); test_assert(written == i); } atomic_puts(" done"); return 0; } rr-4.1.0/src/test/mmap_write.run000066400000000000000000000002061265436462100166060ustar00rootroot00000000000000source `dirname $0`/util.sh fails "write()s to SHARED mapped files aren't propagated to mapping during replay." compare_test 'done' rr-4.1.0/src/test/mprotect.c000066400000000000000000000016051265436462100157210ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { size_t page_size = sysconf(_SC_PAGESIZE); uint8_t* map1 = mmap(NULL, 4 * page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); uint8_t* map1_end = map1 + 2 * page_size; uint8_t* map2; uint8_t* map2_end; test_assert(map1 != (void*)-1); atomic_printf("map1 = [%p, %p)\n", map1, map1_end); mprotect(map1 + page_size, page_size, PROT_NONE); map2 = mmap(map1_end, 2 * page_size, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); map2_end = map2 + page_size; test_assert(map2 != (void*)-1); test_assert(map2 == map1_end); atomic_printf("map2 = [%p, %p)\n", map2, map2_end); mprotect(map2, page_size, PROT_NONE); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/mprotect_growsdown.c000066400000000000000000000015101265436462100200250ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { char* p; /* map 3 pages since the first page will be made into a guard page by the kernel */ p = mmap(NULL, PAGE_SIZE * 3, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN, -1, 0); test_assert(p != MAP_FAILED); test_assert( 0 == mprotect(p + PAGE_SIZE * 2, PAGE_SIZE, PROT_NONE | PROT_GROWSDOWN)); test_assert(-1 == mprotect(p + 1, PAGE_SIZE, PROT_NONE | PROT_GROWSDOWN)); test_assert(EINVAL == errno); p = (char*)(((uintptr_t)main) & ~((uintptr_t)PAGE_SIZE - 1)); test_assert(-1 == mprotect(p, PAGE_SIZE, PROT_READ | PROT_EXEC | PROT_GROWSDOWN)); test_assert(EINVAL == errno); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/mprotect_heterogenous.c000066400000000000000000000021771265436462100205150ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define TEST_FILE "foo.txt" int main(int argc, char* argv[]) { size_t page_size = sysconf(_SC_PAGESIZE); int fd = open(TEST_FILE, O_CREAT | O_EXCL | O_RDWR, 0600); uint8_t* pages; int ret, err; test_assert(fd >= 0); test_assert(0 == ftruncate(fd, 8 * page_size)); unlink(TEST_FILE); pages = mmap(NULL, 5 * page_size, PROT_WRITE, MAP_PRIVATE, fd, 0); test_assert(pages != (void*)-1); /* Protect second page. */ test_assert(0 == mprotect(pages + page_size, page_size, PROT_NONE)); /* Protect fourth page. */ test_assert(0 == mprotect(pages + 3 * page_size, page_size, PROT_NONE)); /* Protect all five pages. */ test_assert(0 == mprotect(pages, 5 * page_size, PROT_NONE)); /* Unmap second page. */ test_assert(0 == munmap(pages + page_size, page_size)); /* Fail to protect the entire region, because one page is * unmapped. */ errno = 0; ret = mprotect(pages, 5 * page_size, PROT_READ | PROT_WRITE); err = errno; test_assert(-1 == ret && ENOMEM == err); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/mprotect_none.c000066400000000000000000000031721265436462100167410ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { size_t page_size = sysconf(_SC_PAGESIZE); uint8_t* map_base; uint8_t* map; map_base = mmap(NULL, 5 * page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); test_assert(map_base != MAP_FAILED); test_assert(0 == munmap(map_base, 4 * page_size)); map = mmap(map_base + page_size, page_size, PROT_READ, MAP_ANONYMOUS | MAP_FIXED | MAP_PRIVATE, -1, 0); test_assert(map == map_base + page_size); test_assert(-1 == mprotect(map_base, 4 * page_size, PROT_READ | PROT_WRITE)); test_assert(ENOMEM == errno); test_assert(0 == munmap(map_base, 4 * page_size)); map = mmap(map_base + page_size, 2 * page_size, PROT_READ, MAP_ANONYMOUS | MAP_FIXED | MAP_PRIVATE, -1, 0); test_assert(map == map_base + page_size); test_assert(-1 == mprotect(map_base, 2 * page_size, PROT_READ | PROT_WRITE)); test_assert(ENOMEM == errno); map = mmap(map_base + 4 * page_size, page_size, PROT_READ, MAP_ANONYMOUS | MAP_FIXED | MAP_PRIVATE, -1, 0); /* The first mapped page will be mprotect'ed PROT_READ | PROT_WRITE and then it will return ENOMEM. */ test_assert(-1 == mprotect(map_base + 2 * page_size, 3 * page_size, PROT_READ | PROT_WRITE)); test_assert(ENOMEM == errno); map_base[2 * page_size] = 1; test_assert(0 == munmap(map_base + page_size, 2 * page_size)); test_assert(-1 == mprotect(map_base, 4 * page_size, PROT_READ | PROT_WRITE)); test_assert(ENOMEM == errno); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/mprotect_stack.c000066400000000000000000000016211265436462100171040ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { size_t page_size = sysconf(_SC_PAGESIZE); uint8_t* map1 = mmap(NULL, 4 * page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); uint8_t* map1_end = map1 + 2 * page_size; uint8_t* map2; uint8_t* map2_end; test_assert(map1 != (void*)-1); atomic_printf("map1 = [%p, %p)\n", map1, map1_end); mprotect(map1 + page_size, page_size, PROT_NONE); map2 = mmap(map1_end, 2 * page_size, PROT_READ | PROT_WRITE, MAP_STACK | MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); map2_end = map2 + page_size; test_assert(map2 != (void*)-1); test_assert(map2 == map1_end); atomic_printf("map2 = [%p, %p)\n", map2, map2_end); mprotect(map2, page_size, PROT_NONE); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/mremap.c000066400000000000000000000031751265436462100153510ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define DUMMY_FILE "dummy.txt" static void check_mapping(int* rpage, int* wpage, ssize_t nr_ints) { int i; for (i = 0; i < nr_ints; ++i) { test_assert(wpage[i] == rpage[i]); wpage[i] = i; test_assert(rpage[i] == i && wpage[i] == rpage[i]); } atomic_printf(" %p and %p point at the same resource\n", rpage, wpage); } static void overwrite_file(const char* path, ssize_t num_bytes) { const int magic = 0x5a5a5a5a; int fd = open(path, O_TRUNC | O_RDWR, 0600); int i; for (i = 0; i < num_bytes / sizeof(magic); ++i) { write(fd, &magic, sizeof(magic)); } close(fd); } int main(int argc, char* argv[]) { size_t num_bytes = sysconf(_SC_PAGESIZE); char file_name[] = "/tmp/rr-test-mremap-XXXXXX"; int fd = mkstemp(file_name); int* wpage; int* rpage; int* old_wpage; test_assert(fd >= 0); overwrite_file(file_name, 2 * num_bytes); wpage = mmap(NULL, num_bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); rpage = mmap(NULL, num_bytes, PROT_READ, MAP_SHARED, fd, 0); atomic_printf("wpage:%p rpage:%p\n", wpage, rpage); test_assert(wpage != (void*)-1 && rpage != (void*)-1 && rpage != wpage); check_mapping(rpage, wpage, num_bytes / sizeof(*wpage)); overwrite_file(file_name, 2 * num_bytes); old_wpage = wpage; wpage = mremap(old_wpage, num_bytes, 2 * num_bytes, MREMAP_MAYMOVE); atomic_printf("remapped wpage:%p\n", wpage); test_assert(wpage != (void*)-1 && wpage != old_wpage); check_mapping(rpage, wpage, num_bytes / sizeof(*wpage)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/mremap_shrink.c000066400000000000000000000007171265436462100167260ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { void* p = mmap(NULL, 3 * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); void* p2 = mremap(p, 3 * PAGE_SIZE, 2 * PAGE_SIZE, 0); void* p3 = mremap(p, 2 * PAGE_SIZE, PAGE_SIZE, MREMAP_MAYMOVE); atomic_printf("%p %p %p\n", p, p2, p3); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/msg.c000066400000000000000000000076741265436462100146660ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define NUM_MSGS 3 struct msg { long mtype; long msg; }; static void breakpoint(void) { int break_here = 1; (void)break_here; } static int msqid; static void child(void) { const char* id = "c"; int i; int ret; int err; struct msg msg = { 0 }; atomic_printf("%s: inherited msg id %d\n", id, msqid); test_assert(msqid >= 0); for (i = 1; i < NUM_MSGS; ++i) { atomic_printf("%s: msgrcv() ...\n", id); ret = msgrcv(msqid, &msg, sizeof(msg.msg), i, 0); err = errno; atomic_printf("%s: ... returned %d (%s/%d): (%ld, %ld)\n", id, ret, strerror(err), err, msg.mtype, msg.msg); test_assert(sizeof(msg.msg) == ret); test_assert(msg.mtype + 1 == msg.msg); } atomic_printf("%s: awaiting Q destruction ...\n", id); ret = msgrcv(msqid, &msg, sizeof(msg.msg), i, 0); err = errno; atomic_printf("%s: ... returned %d (%s/%d)\n", id, ret, strerror(err), err); test_assert(-1 == ret && EIDRM == err); atomic_printf("%s: ... done\n", id); exit(0); } int main(int argc, char* argv[]) { const char* id = "P"; int ret; int err; pid_t c; struct msqid_ds buf; struct msginfo info; int i; struct msg msg = { 0 }; int status; breakpoint(); /* NB: no syscalls between here and |msgget()| below. */ /* NB: surprisingly, this test will leak Q's on failure, even * though we're using IPC_PRIVATE. There doesn't appear to be * a way to avoid that. */ msqid = msgget(IPC_PRIVATE, 0600); atomic_printf("%s: got id %d for key %d\n", id, msqid, IPC_PRIVATE); test_assert(msqid >= 0); memset(&buf, 0x5a, sizeof(buf)); test_assert(0 == msgctl(msqid, IPC_STAT, &buf)); atomic_printf("%s: Q stats in %p:\n" " { perm:%#o qnum:%ld lspid:%d lrpid:%d }\n", id, &buf, buf.msg_perm.mode, buf.msg_qnum, buf.msg_lspid, buf.msg_lrpid); memset(&info, 0x5a, sizeof(info)); ret = msgctl(msqid, IPC_INFO, (struct msqid_ds*)&info); err = errno; atomic_printf("%s: IPC_INFO returned %d (%s/%d):\n" " { max:%d mnb:%d mni:%d }\n", id, ret, strerror(err), err, info.msgmax, info.msgmnb, info.msgmni); test_assert(ret >= 0); memset(&info, 0x5a, sizeof(info)); ret = msgctl(msqid, MSG_INFO, (struct msqid_ds*)&info); err = errno; atomic_printf("%s: MSG_INFO returned %d (%s/%d):\n" " { pool:%d map:%d tql:%d }\n", id, ret, strerror(err), err, info.msgpool, info.msgmap, info.msgtql); test_assert(ret >= 0); if ((0 == (c = fork()))) { child(); test_assert("Not reached" && 0); } /* Make the child wait on msgrcv() a few times. */ for (i = 1; i < NUM_MSGS; ++i) { atomic_printf("%s: sleeping ...\n", id); usleep(500000); msg.mtype = i; msg.msg = msg.mtype + 1; atomic_printf("%s: sending msg (%ld, %ld) ...\n", id, msg.mtype, msg.msg); ret = msgsnd(msqid, &msg, sizeof(msg.msg), 0); err = errno; atomic_printf("%s: ... returned %d (%s/%d)\n", id, ret, strerror(err), err); test_assert(0 == ret); atomic_printf("%s: ... done\n", id); } memset(&buf, 0x5a, sizeof(buf)); test_assert(0 == msgctl(msqid, IPC_STAT, &buf)); atomic_printf("%s: Q stats: { perm:%#o qnum:%ld lspid:%d lrpid:%d }\n", id, buf.msg_perm.mode, buf.msg_qnum, buf.msg_lspid, buf.msg_lrpid); /* Make the child wait on msgrcv() returning EIDRM. */ atomic_printf("%s: sleeping ...\n", id); usleep(500000); atomic_printf("%s: destroying msg Q ...\n", id); test_assert(0 == msgctl(msqid, IPC_RMID, NULL)); atomic_printf("%s: ... done", id); atomic_printf("%s: joining %d ...\n", id, c); ret = waitpid(c, &status, 0); atomic_printf("%s: ... joined %d with status %#x\n", id, ret, status); test_assert(c == ret && WIFEXITED(status) && 0 == WEXITSTATUS(status)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/msync.c000066400000000000000000000015611265436462100152160ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define FILENAME "foo.txt" int main(int argc, char* argv[]) { size_t page_size = sysconf(_SC_PAGESIZE); int fd = open(FILENAME, O_CREAT | O_EXCL | O_RDWR, 0600); int* wpage; int i; int* rpage; unlink(FILENAME); test_assert(fd >= 0); ftruncate(fd, page_size); wpage = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd, 0); test_assert(wpage != (void*)-1); for (i = 0; i < page_size / sizeof(int); ++i) { wpage[i] = i; } rpage = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0); test_assert(rpage != (void*)-1 && wpage != rpage); msync(wpage, page_size, MS_INVALIDATE); for (i = 0; i < page_size / sizeof(int); ++i) { test_assert(rpage[i] == i); atomic_printf("%d,", rpage[i]); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/multiple_pending_signals.c000066400000000000000000000032201265436462100211360ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" /* In this test, usually SIGUSR1 will be delivered, and then as soon as we enter the SIGUSR1 handler, SIGUSR2 will be delivered and run nested inside the SIGUSR1 handler. This doesn't usually exercise multiple pending signals within rr itself, because the kernel doesn't notify rr of the second signal until rr has injected the first signal. */ static int to_child[2]; static int from_child[2]; static void* run_thread(void* p) { char ch; sigset_t s; sigemptyset(&s); sigaddset(&s, SIGUSR1); sigaddset(&s, SIGUSR2); sigprocmask(SIG_SETMASK, &s, NULL); /* yield to the main thread to minimize the chance of a context switch during the following two syscalls */ test_assert(1 == read(to_child[0], &ch, 1)); test_assert('J' == ch); kill(getpid(), SIGUSR1); kill(getpid(), SIGUSR2); test_assert(1 == write(from_child[1], "K", 1)); return NULL; } static void handler(int sig, siginfo_t* si, void* p) { atomic_printf("Handling signal %s\n", sig == SIGUSR1 ? "SIGUSR1" : "SIGUSR2"); } int main(int argc, char* argv[]) { pthread_t t; char ch; struct sigaction sa; test_assert(0 == pipe(to_child)); test_assert(0 == pipe(from_child)); sa.sa_sigaction = handler; sigemptyset(&sa.sa_mask); sa.sa_flags = SA_SIGINFO | SA_RESTART; sigaction(SIGUSR1, &sa, NULL); sigaction(SIGUSR2, &sa, NULL); pthread_create(&t, NULL, run_thread, NULL); test_assert(1 == write(to_child[1], "J", 1)); test_assert(1 == read(from_child[0], &ch, 1)); test_assert('K' == ch); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/multiple_pending_signals_sequential.c000066400000000000000000000034711265436462100234000ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" /* In this test, usually SIGUSR1 will be delivered, the SIGUSR1 handler will run, and when the handler returns, SIGUSR2 will be delivered and its handler will run. This doesn't usually exercise multiple pending signals within rr itself, because the kernel doesn't notify rr of the second signal until rr has injected the first signal. */ static int to_child[2]; static int from_child[2]; static void* run_thread(void* p) { char ch; sigset_t s; sigemptyset(&s); sigaddset(&s, SIGUSR1); sigaddset(&s, SIGUSR2); sigprocmask(SIG_SETMASK, &s, NULL); /* yield to the main thread to minimize the chance of a context switch during the following two syscalls */ test_assert(1 == read(to_child[0], &ch, 1)); test_assert('J' == ch); kill(getpid(), SIGUSR1); kill(getpid(), SIGUSR2); return NULL; } static int handler_count; static void handler(int sig, siginfo_t* si, void* p) { atomic_printf("Handling signal %s\n", sig == SIGUSR1 ? "SIGUSR1" : "SIGUSR2"); ++handler_count; if (handler_count == 2) { test_assert(1 == write(from_child[1], "K", 1)); } } int main(int argc, char* argv[]) { pthread_t t; char ch; struct sigaction sa; test_assert(0 == pipe(to_child)); test_assert(0 == pipe(from_child)); sa.sa_sigaction = handler; sigemptyset(&sa.sa_mask); sigaddset(&sa.sa_mask, SIGUSR2); sa.sa_flags = SA_SIGINFO | SA_RESTART; sigaction(SIGUSR1, &sa, NULL); sigemptyset(&sa.sa_mask); sigaddset(&sa.sa_mask, SIGUSR1); sigaction(SIGUSR2, &sa, NULL); pthread_create(&t, NULL, run_thread, NULL); test_assert(1 == write(to_child[1], "J", 1)); test_assert(1 == read(from_child[0], &ch, 1)); test_assert('K' == ch); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/munmap_discontinuous.c000066400000000000000000000014601265436462100203460ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define TEST_FILE "foo.txt" int main(int argc, char* argv[]) { size_t page_size = sysconf(_SC_PAGESIZE); int fd = open(TEST_FILE, O_CREAT | O_EXCL | O_RDWR, 0600); uint8_t* pages; test_assert(fd >= 0); test_assert(0 == ftruncate(fd, 8 * page_size)); unlink(TEST_FILE); pages = mmap(NULL, 8 * page_size, PROT_WRITE, MAP_PRIVATE, fd, 0); test_assert(pages != (void*)-1); /* Unmap first page. */ munmap(pages, page_size); /* Unmap third page. */ munmap(pages + 2 * page_size, page_size); /* Unmap fifth page. */ munmap(pages + 4 * page_size, page_size); /* Unmap first 6 page locations, leave last 2. */ munmap(pages, 6 * page_size); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/munmap_segv.c000066400000000000000000000007511265436462100164060ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void sighandler(int sig) { atomic_puts("EXIT-SUCCESS"); _exit(0); } int main(int argc, char* argv[]) { char* p = (char*)mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); test_assert(p != MAP_FAILED); signal(SIGSEGV, sighandler); *p = 'a'; test_assert(0 == munmap(p, PAGE_SIZE)); *p = 'b'; return 0; } rr-4.1.0/src/test/mutex_pi_stress.c000066400000000000000000000020321265436462100173140ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define NUM_THREADS 10 #define NUM_TRIALS 1000 static pthread_mutex_t lock; static void* thread(void* idp) { int tid = (intptr_t)idp; int i; atomic_printf("thread %d starting ...\n", tid); for (i = 0; i < NUM_TRIALS; ++i) { pthread_mutex_lock(&lock); sched_yield(); pthread_mutex_unlock(&lock); } atomic_printf(" ... thread %d done.\n", tid); return NULL; } int main(int argc, char* argv[]) { pthread_mutexattr_t attr; pthread_t threads[NUM_THREADS]; int i; pthread_mutexattr_init(&attr); test_assert(0 == pthread_mutexattr_setprotocol(&attr, PTHREAD_PRIO_INHERIT)); test_assert(0 == pthread_mutex_init(&lock, &attr)); for (i = 0; i < NUM_THREADS; ++i) { test_assert(0 == pthread_create(&threads[i], NULL, thread, (void*)(intptr_t)i)); } for (i = 0; i < NUM_THREADS; ++i) { test_assert(0 == pthread_join(threads[i], NULL)); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/mutex_pi_stress.run000066400000000000000000000001741265436462100177030ustar00rootroot00000000000000source `dirname $0`/util.sh # Switch threads very eagerly on recorded events. RECORD_ARGS="-e1" compare_test EXIT-SUCCESS rr-4.1.0/src/test/nanosleep.c000066400000000000000000000005301265436462100160440ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { test_assert(argc == 2); int sleep_secs = atoi(argv[1]); struct timespec ts = {.tv_sec = sleep_secs }; atomic_puts("sleeping"); nanosleep(&ts, NULL); atomic_puts("EXIT-SUCCESS"); return 1; } rr-4.1.0/src/test/nanosleep.run000066400000000000000000000001451265436462100164300ustar00rootroot00000000000000source `dirname $0`/util.sh record $TESTNAME 1 # 1 second replay check EXIT-SUCCESS rr-4.1.0/src/test/no_mask_timeslice.c000066400000000000000000000012761265436462100175550ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int pseudospinlock; static pthread_barrier_t bar; static void* thread(void* unused) { pthread_barrier_wait(&bar); sched_yield(); pseudospinlock = 1; return NULL; } int main(int argc, char* argv[]) { sigset_t old, mask; pthread_t t; pthread_barrier_init(&bar, NULL, 2); test_assert(0 == pthread_create(&t, NULL, thread, NULL)); sigfillset(&mask); pthread_sigmask(SIG_BLOCK, &mask, &old); pthread_barrier_wait(&bar); while (!pseudospinlock) ; pthread_sigmask(SIG_SETMASK, &old, NULL); pthread_join(t, NULL); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/numa.c000066400000000000000000000017101265436462100150210ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define MPOL_DEFAULT 0 #define MPOL_PREFERRED 1 #define MPOL_BIND 2 #define MPOL_INTERLEAVE 3 #define MPOL_MF_STRICT 0x1 #define MPOL_MF_MOVE 0x2 #define MPOL_MF_MOVE_ALL 0x4 #define MPOL_F_STATIC_NODES (1 << 15) #define MPOL_F_RELATIVE_NODES (1 << 14) static long mbind(void* start, unsigned long len, int mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags); } int main(int argc, char** argv) { void* p = mmap(NULL, 16 * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); int ret; test_assert(p != MAP_FAILED); ret = mbind(p, 16 * PAGE_SIZE, MPOL_PREFERRED, NULL, 0, MPOL_MF_MOVE); test_assert(ret == 0 || (ret == -1 && errno == ENOSYS)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/old_fork.c000066400000000000000000000006051265436462100156620ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { pid_t child; int status; if (0 == (child = syscall(SYS_fork))) { return 11; } test_assert(child == waitpid(child, &status, 0)); test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 11); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/orphan_process.c000066400000000000000000000020201265436462100171010ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { /* Descendant process will write one byte to the pipe to signal that it's * complete */ int pipe_fds[2]; int err = pipe(pipe_fds); pid_t child_pid, grandchild_pid, greatgrandchild_pid, dyingchild_pid; test_assert(err == 0); child_pid = fork(); if (child_pid) { char buf; int n = read(pipe_fds[0], &buf, 1); test_assert(n == 1); return 0; } /* In child */ dyingchild_pid = getpid(); grandchild_pid = fork(); if (grandchild_pid) { exit(0); } /* In granchild */ /* Wait for parent to die */ while (getppid() == dyingchild_pid) { sched_yield(); } /* Now the rr supervisor process is no longer our ancestor in the process tree. Try forking again. */ greatgrandchild_pid = fork(); if (greatgrandchild_pid) { exit(0); } /* In great-grandchild */ atomic_puts("EXIT-SUCCESS"); write(pipe_fds[1], "a", 1); return 0; } rr-4.1.0/src/test/parent_no_break_child_bkpt.py000066400000000000000000000004521265436462100216050ustar00rootroot00000000000000from rrutil import * def observe_normal_parent_exit(): expect_rr('EXIT-SUCCESS') expect_gdb(r'Inferior 1 \(process \d+\) exited normally') send_gdb('b breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') observe_normal_parent_exit() restart_replay() observe_normal_parent_exit() ok() rr-4.1.0/src/test/parent_no_break_child_bkpt.run000066400000000000000000000001351265436462100217570ustar00rootroot00000000000000source `dirname $0`/util.sh record fork_child_crash$bitness debug parent_no_break_child_bkpt rr-4.1.0/src/test/parent_no_stop_child_crash.py000066400000000000000000000003651265436462100216510ustar00rootroot00000000000000from rrutil import * def observe_normal_parent_exit(): expect_rr('EXIT-SUCCESS') expect_gdb(r'Inferior 1 \(process \d+\) exited normally') send_gdb('c') observe_normal_parent_exit() restart_replay() observe_normal_parent_exit() ok() rr-4.1.0/src/test/parent_no_stop_child_crash.run000066400000000000000000000001351265436462100220200ustar00rootroot00000000000000source `dirname $0`/util.sh record fork_child_crash$bitness debug parent_no_stop_child_crash rr-4.1.0/src/test/pause.c000066400000000000000000000010441265436462100151760ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int caught_signal; static void handle_signal(int sig) { ++caught_signal; } int main(int argc, char* argv[]) { int err; signal(SIGALRM, handle_signal); alarm(1); atomic_puts("set alarm for 1 sec from now; pausing ..."); pause(); err = errno; atomic_printf(" ... woke up with errno %s(%d)\n", strerror(err), err); test_assert(1 == caught_signal); test_assert(EINTR == err); atomic_puts("EXIT-SUCCESS"); return 1; } rr-4.1.0/src/test/perf_event.c000066400000000000000000000021031265436462100162130ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int counter_fd; static int sys_perf_event_open(struct perf_event_attr* attr, pid_t pid, int cpu, int group_fd, unsigned long flags) { return syscall(SYS_perf_event_open, attr, pid, cpu, group_fd, flags); } static uint64_t get_desched(void) { uint64_t nr_desched; test_assert(sizeof(nr_desched) == read(counter_fd, &nr_desched, sizeof(nr_desched))); return nr_desched; } int main(int argc, char* argv[]) { struct perf_event_attr attr; int i; memset(&attr, 0, sizeof(attr)); attr.size = sizeof(attr); attr.type = PERF_TYPE_SOFTWARE; attr.config = PERF_COUNT_SW_CONTEXT_SWITCHES; counter_fd = sys_perf_event_open(&attr, 0 /*self*/, -1 /*any cpu*/, -1, 0); test_assert(0 <= counter_fd); atomic_printf("num descheds: %" PRIu64 "\n", get_desched()); for (i = 0; i < 5; ++i) { sched_yield(); atomic_printf("after yield: %" PRIu64 "\n", get_desched()); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/personality.c000066400000000000000000000004431265436462100164340ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #include int main(int argc, char* argv[]) { personality(PER_LINUX); test_assert(personality(0xffffffff) == PER_LINUX); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/poll_sig_race.c000066400000000000000000000016651265436462100166740ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define NUM_ITERATIONS 10 int main(int argc, char* argv[]) { int fds[2]; struct pollfd pfd; int i; pipe2(fds, O_NONBLOCK); pfd.fd = fds[0]; pfd.events = POLLIN; for (i = 0; i < NUM_ITERATIONS; ++i) { char c; int ret; atomic_printf("iteration %d\n", i); if (0 == fork()) { usleep(250000); write(fds[1], "x", 1); return 0; } /* wait for 1 second, which should be long enough for the chlid to do its write. In extreme cases the child might run to completion before this poll() call is entered, in which case we will time out safely. */ ret = poll(&pfd, 1, 1000); if (ret == 0) { continue; } test_assert(1 == ret); test_assert(POLLIN & pfd.revents); test_assert(1 == read(pfd.fd, &c, 1)); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/prctl.c000066400000000000000000000037021265436462100152100ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { char setname[16] = "prctl-test"; char getname[16]; unsigned long slack = sizeof(unsigned long) == 4 ? 1024 * 1024 * 1024 : (unsigned long)(1024LL * 1024 * 1024 * 8); int sig = 99; int tsc = 99; int dummy; test_assert(0 == prctl(PR_SET_KEEPCAPS, 0)); test_assert(0 == prctl(PR_GET_KEEPCAPS)); test_assert(0 == prctl(PR_SET_KEEPCAPS, 1)); test_assert(1 == prctl(PR_GET_KEEPCAPS)); test_assert(0 == prctl(PR_SET_NAME, setname)); test_assert(0 == prctl(PR_GET_NAME, getname)); atomic_printf("set name `%s'; got name `%s'\n", setname, getname); test_assert(!strcmp(getname, setname)); test_assert(0 == prctl(PR_SET_DUMPABLE, 0)); test_assert(0 == prctl(PR_GET_DUMPABLE)); test_assert(0 == prctl(PR_SET_DUMPABLE, 1)); test_assert(1 == prctl(PR_GET_DUMPABLE)); test_assert(0 == prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); test_assert(1 == prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0)); test_assert(0 == prctl(PR_SET_TIMERSLACK, slack)); /* prctl coerces its result to int */ test_assert((int)slack == prctl(PR_GET_TIMERSLACK)); test_assert(0 == prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0)); test_assert(PR_MCE_KILL_EARLY == prctl(PR_MCE_KILL_GET, 0, 0, 0, 0)); test_assert(-1 == prctl(PR_GET_ENDIAN, &dummy) && errno == EINVAL); test_assert(-1 == prctl(PR_GET_FPEMU, &dummy) && errno == EINVAL); test_assert(-1 == prctl(PR_GET_FPEXC, &dummy) && errno == EINVAL); test_assert(-1 == prctl(PR_GET_UNALIGN, &dummy) && errno == EINVAL); test_assert(0 == prctl(PR_GET_PDEATHSIG, (unsigned long)&sig)); test_assert(sig == 0); test_assert(0 == prctl(PR_GET_TSC, (unsigned long)&tsc)); test_assert(tsc == PR_TSC_ENABLE); test_assert(0 == prctl(PR_GET_SECCOMP)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/prctl_deathsig.c000066400000000000000000000014531265436462100170610ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int child_to_main_fds[2]; static void handle_signal(int sig) { test_assert(sig == SIGILL); atomic_puts("EXIT-SUCCESS"); exit(0); } static int run_child(void) { int sig = 99; char ch = 'x'; signal(SIGILL, handle_signal); test_assert(0 == prctl(PR_SET_PDEATHSIG, SIGILL)); test_assert(0 == prctl(PR_GET_PDEATHSIG, (unsigned long)&sig)); test_assert(sig == SIGILL); test_assert(1 == write(child_to_main_fds[1], &ch, 1)); sleep(1000000); test_assert(0); return 0; } int main(int argc, char* argv[]) { char ch; test_assert(0 == pipe(child_to_main_fds)); if (!fork()) { return run_child(); } test_assert(1 == read(child_to_main_fds[0], &ch, 1)); return 0; } rr-4.1.0/src/test/prctl_name.c000066400000000000000000000044211265436462100162070ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" /* Max name length is 16 bytes, *without* null terminator. */ #define PRNAME_NUM_BYTES 16 const char* exe_image; const char main_name[] = "main"; const char thread_name[] = "thread"; const char fork_child_name[] = "fchild"; const char exec_child_name[] = "echild"; static void assert_prname_is(const char* tag, const char* name) { char prname[PRNAME_NUM_BYTES] = { 0 }; test_assert(0 == prctl(PR_GET_NAME, prname)); atomic_printf("%s: prname is '%s'; expecting '%s'\n", tag, prname, name); test_assert(!strcmp(prname, name)); } static void* thread(void* unused) { pid_t child; assert_prname_is("thread", main_name); prctl(PR_SET_NAME, thread_name); assert_prname_is("thread", thread_name); if ((child = fork())) { int status; test_assert(child == waitpid(child, &status, 0)); test_assert(WIFEXITED(status) && 0 == WEXITSTATUS(status)); assert_prname_is("thread", thread_name); return NULL; } assert_prname_is("fork child", thread_name); prctl(PR_SET_NAME, fork_child_name); assert_prname_is("fork child", fork_child_name); execl(exe_image, exe_image, "exec child", NULL); test_assert("Not reached" && 0); return NULL; } char initial_name[PRNAME_NUM_BYTES] = { 0 }; static void compute_initial_name(const char* exe_image) { const char* basename = strrchr(exe_image, '/'); if (basename) { /* Eat the '/' character. */ ++basename; } else { /* Image path is already a basename. */ basename = exe_image; } atomic_printf(" (basename of exe path '%s' is '%s')\n", exe_image, basename); strncpy(initial_name, basename, sizeof(initial_name) - 1); } int main(int argc, char* argv[]) { pthread_t t; exe_image = argv[0]; compute_initial_name(exe_image); if (2 == argc) { assert_prname_is("exec child", initial_name); prctl(PR_SET_NAME, exec_child_name); assert_prname_is("exec child", exec_child_name); return 0; } assert_prname_is("main", initial_name); prctl(PR_SET_NAME, main_name); assert_prname_is("main", main_name); test_assert(0 == pthread_create(&t, NULL, thread, NULL)); pthread_join(t, NULL); assert_prname_is("main", main_name); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/priority.c000066400000000000000000000010451265436462100157430ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(void) { int prio1, prio2; prio1 = getpriority(PRIO_PROCESS, 0); atomic_printf("Current process priority: %d\n", prio1); if (prio1 < 19) { /* If it's less than 19, we can decrease the * priority. */ ++prio1; } setpriority(PRIO_PROCESS, 0, prio1); prio2 = getpriority(PRIO_PROCESS, 0); test_assert(prio1 == prio2); atomic_printf("Now priority is: %d\n", prio2); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/priority.run000066400000000000000000000001551265436462100163260ustar00rootroot00000000000000source `dirname $0`/util.sh renice -n 1 $$ && record $TESTNAME renice -n 2 $$ && replay check 'EXIT-SUCCESS' rr-4.1.0/src/test/protect_rr_fds.c000066400000000000000000000026031265436462100171020ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { pid_t child; int status; int ret; int fd; int pipe_fds[2]; struct rlimit nofile; if (argc == 2) { atomic_puts("EXIT-SUCCESS"); return 77; } /* Various spawning APIs try to close all open file descriptors before exec --- via direct close(), or by setting CLOEXEC. Check that those don't interfere with rr by closing RR_RESERVED_ROOT_DIR_FD or some other essential file descriptor. */ test_assert(0 == getrlimit(RLIMIT_NOFILE, &nofile)); for (fd = STDERR_FILENO + 1; fd < nofile.rlim_cur; ++fd) { ret = fcntl(fd, F_SETFD, FD_CLOEXEC); test_assert(ret == 0 || (ret == -1 && errno == EBADF)); ret = dup2(STDERR_FILENO, fd); test_assert(ret == fd || (ret == -1 && errno == EBADF)); ret = dup3(STDERR_FILENO, fd, O_CLOEXEC); test_assert(ret == fd || (ret == -1 && errno == EBADF)); ret = close(fd); test_assert(ret == 0 || (ret == -1 && errno == EBADF)); } /* Check that syscall buffering still works */ test_assert(0 == pipe(pipe_fds)); test_assert(1 == write(pipe_fds[1], "c", 1)); if (0 == (child = fork())) { execl(argv[0], argv[0], "step2", NULL); } test_assert(child == waitpid(child, &status, 0)); test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 77); return 0; } rr-4.1.0/src/test/prw.c000066400000000000000000000013461265436462100146760ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { int fd = open("prw.txt", O_CREAT | O_RDWR, 0600); const char content[] = "01234567890\nhello there\n"; char buf[sizeof(content)]; ssize_t nr; memset(buf, '?', sizeof(buf)); nr = write(fd, buf, sizeof(buf)); test_assert(nr == sizeof(buf)); nr = write(fd, buf, 10); test_assert(nr == 10); nr = pwrite(fd, content, sizeof(content), 10); test_assert(nr == sizeof(content)); atomic_printf("Wrote ```%s'''\n", content); nr = pread(fd, buf, sizeof(buf), 10); test_assert(nr == sizeof(content)); atomic_printf("Read ```%s'''\n", buf); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/pthread_condvar_locking.c000066400000000000000000000015061265436462100207350ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static pthread_cond_t condvar = PTHREAD_COND_INITIALIZER; static pthread_mutex_t mutex; static void* start_thread(void* p) { while (1) { sched_yield(); pthread_mutex_lock(&mutex); pthread_cond_signal(&condvar); pthread_mutex_unlock(&mutex); } return NULL; } int main(int argc, char** argv) { pthread_mutexattr_t attr; pthread_t thread; pthread_mutexattr_init(&attr); pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); pthread_mutex_init(&mutex, &attr); pthread_create(&thread, NULL, start_thread, NULL); pthread_mutex_lock(&mutex); pthread_cond_wait(&condvar, &mutex); pthread_cond_wait(&condvar, &mutex); pthread_mutex_unlock(&mutex); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/pthread_rwlocks.c000066400000000000000000000013771265436462100172650ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static pthread_rwlock_t lock = PTHREAD_RWLOCK_INITIALIZER; static int pipe_fds[2]; static void* start_thread(void* p) { pthread_rwlock_rdlock(&lock); pthread_rwlock_unlock(&lock); test_assert(1 == write(pipe_fds[1], "x", 1)); pthread_rwlock_wrlock(&lock); pthread_rwlock_unlock(&lock); return NULL; } int main(int argc, char** argv) { pthread_t thread; char ch; test_assert(0 == pipe(pipe_fds)); pthread_rwlock_rdlock(&lock); pthread_create(&thread, NULL, start_thread, NULL); test_assert(1 == read(pipe_fds[0], &ch, 1)); pthread_rwlock_unlock(&lock); pthread_join(thread, NULL); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/ptrace.c000066400000000000000000000053231265436462100153430ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define NEW_VALUE 0xabcdef static size_t static_data = 0x12345678; int main(int argc, char* argv[]) { pid_t child; int status; struct user_regs_struct* regs; struct user_fpregs_struct* fpregs; #ifdef __i386__ struct user_fpxregs_struct* fpxregs; #endif int pipe_fds[2]; test_assert(0 == pipe(pipe_fds)); if (0 == (child = fork())) { char ch; read(pipe_fds[0], &ch, 1); test_assert(static_data == NEW_VALUE); return 77; } test_assert(0 == ptrace(PTRACE_ATTACH, child, NULL, NULL)); test_assert(child == waitpid(child, &status, 0)); test_assert(status == ((SIGSTOP << 8) | 0x7f)); ALLOCATE_GUARD(regs, 0xFF); test_assert(0 == ptrace(PTRACE_GETREGS, child, NULL, regs)); #if defined(__i386__) test_assert((int32_t)regs->eip != -1); test_assert((int32_t)regs->esp != -1); #elif defined(__x86_64__) test_assert((int64_t)regs->rip != -1); test_assert((int64_t)regs->rsp != -1); #else #error unknown architecture #endif VERIFY_GUARD(regs); ALLOCATE_GUARD(fpregs, 0xBB); test_assert(0 == ptrace(PTRACE_GETFPREGS, child, NULL, fpregs)); test_assert(NULL == memchr(fpregs, 0xBB, sizeof(*fpregs))); VERIFY_GUARD(fpregs); #ifdef __i386__ ALLOCATE_GUARD(fpxregs, 0xCC); test_assert(0 == ptrace(PTRACE_GETFPXREGS, child, NULL, fpxregs)); test_assert(NULL == memchr(fpxregs, 0xCC, sizeof(*fpxregs))); VERIFY_GUARD(fpxregs); #endif test_assert(static_data == ptrace(PTRACE_PEEKDATA, child, &static_data, NULL)); test_assert(0 == ptrace(PTRACE_POKEDATA, child, &static_data, (void*)NEW_VALUE)); test_assert(NEW_VALUE == ptrace(PTRACE_PEEKDATA, child, &static_data, NULL)); /* Test invalid locations */ test_assert(-1 == ptrace(PTRACE_PEEKDATA, child, NULL, NULL)); test_assert(errno == EIO || errno == EFAULT); test_assert(-1 == ptrace(PTRACE_POKEDATA, child, NULL, (void*)NEW_VALUE)); test_assert(errno == EIO || errno == EFAULT); test_assert(regs->eflags == ptrace(PTRACE_PEEKUSER, child, (void*)offsetof(struct user, regs.eflags), NULL)); test_assert(0 == ptrace(PTRACE_PEEKUSER, child, (void*)offsetof(struct user, u_debugreg[0]), NULL)); test_assert(0 == ptrace(PTRACE_PEEKUSER, child, (void*)offsetof(struct user, u_debugreg[7]), NULL)); test_assert(0 == ptrace(PTRACE_DETACH, child, NULL, NULL)); test_assert(1 == write(pipe_fds[1], "x", 1)); test_assert(child == waitpid(child, &status, 0)); test_assert(WIFEXITED(status)); test_assert(WEXITSTATUS(status) == 77); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/ptrace.run000066400000000000000000000001311265436462100157150ustar00rootroot00000000000000source `dirname $0`/util.sh fails "There's no meaningful ptrace() support to test yet." rr-4.1.0/src/test/ptrace_attach_null_status.c000066400000000000000000000007471265436462100213310ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { pid_t child; struct timespec ts = { 0, 50000000 }; if (0 == (child = fork())) { sleep(1000000); return 77; } nanosleep(&ts, NULL); test_assert(0 == ptrace(PTRACE_ATTACH, child, NULL, NULL)); test_assert(child == waitpid(child, NULL, 0)); test_assert(0 == kill(child, SIGKILL)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/ptrace_attach_running.c000066400000000000000000000010541265436462100204240ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { pid_t child; int status; struct timespec ts = { 0, 50000000 }; if (0 == (child = fork())) { while (1) { } return 77; } nanosleep(&ts, NULL); test_assert(0 == ptrace(PTRACE_ATTACH, child, NULL, NULL)); test_assert(child == waitpid(child, &status, 0)); test_assert(status == ((SIGSTOP << 8) | 0x7f)); test_assert(0 == kill(child, SIGKILL)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/ptrace_attach_sleeping.c000066400000000000000000000010521265436462100205500ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { pid_t child; int status; struct timespec ts = { 0, 50000000 }; if (0 == (child = fork())) { sleep(1000000); return 77; } nanosleep(&ts, NULL); test_assert(0 == ptrace(PTRACE_ATTACH, child, NULL, NULL)); test_assert(child == waitpid(child, &status, 0)); test_assert(status == ((SIGSTOP << 8) | 0x7f)); test_assert(0 == kill(child, SIGKILL)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/ptrace_attach_stopped.c000066400000000000000000000010631265436462100204220ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { pid_t child; int status; struct timespec ts = { 0, 50000000 }; if (0 == (child = fork())) { kill(getpid(), SIGSTOP); return 77; } nanosleep(&ts, NULL); test_assert(0 == ptrace(PTRACE_ATTACH, child, NULL, NULL)); test_assert(child == waitpid(child, &status, 0)); test_assert(status == ((SIGSTOP << 8) | 0x7f)); test_assert(0 == kill(child, SIGKILL)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/ptrace_attach_thread_running.c000066400000000000000000000023561265436462100217610ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int pipe_fds[2]; static void* child_thread(void* p) { char ch; test_assert(1 == read(pipe_fds[0], &ch, 1)); test_assert(ch == 'K'); exit(77); return NULL; } static void* child_thread_running(void* p) { while (1) { } return NULL; } static void run_child(void) { struct timespec ts = { 0, 1000000000 }; pthread_t t; pthread_create(&t, NULL, child_thread, NULL); /* try to get the kernel to deliver signals sent to our pid to some other thread */ pthread_create(&t, NULL, child_thread_running, NULL); nanosleep(&ts, NULL); } int main(int argc, char* argv[]) { pid_t child; int status; struct timespec ts = { 0, 50000000 }; test_assert(0 == pipe(pipe_fds)); if (0 == (child = fork())) { run_child(); } nanosleep(&ts, NULL); test_assert(0 == ptrace(PTRACE_ATTACH, child, NULL, NULL)); test_assert(child == waitpid(child, &status, 0)); test_assert(status == ((SIGSTOP << 8) | 0x7f)); test_assert(1 == write(pipe_fds[1], "K", 1)); test_assert(child == waitpid(child, &status, 0)); test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 77); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/ptrace_signals.c000066400000000000000000000033721265436462100170650ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static char* p; static void sighandler(int sig) { p[1] = 78; signal(SIGSEGV, SIG_DFL); } int main(int argc, char* argv[]) { pid_t child; int status; int pipe_fds[2]; p = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); test_assert(MAP_FAILED != p); p[0] = 0; p[1] = 0; test_assert(0 == pipe(pipe_fds)); if (0 == (child = fork())) { char ch; read(pipe_fds[0], &ch, 1); signal(SIGSEGV, sighandler); p[0] = 77; /* trigger SIGSEGV */ *(char*)NULL = 0; return 77; } test_assert(0 == ptrace(PTRACE_ATTACH, child, NULL, NULL)); test_assert(child == waitpid(child, &status, 0)); test_assert(status == ((SIGSTOP << 8) | 0x7f)); test_assert(1 == write(pipe_fds[1], "x", 1)); test_assert(0 == ptrace(PTRACE_CONT, child, NULL, (void*)0)); test_assert(child == waitpid(child, &status, 0)); test_assert(status == ((SIGSEGV << 8) | 0x7f)); /* Check that the child actually executed forwards to the SIGSEGV */ test_assert(p[0] == 77); test_assert(p[1] == 0); /* Progress to second (fatal) SIGSEGV */ test_assert(0 == ptrace(PTRACE_CONT, child, NULL, (void*)SIGSEGV)); test_assert(child == waitpid(child, &status, 0)); test_assert(status == ((SIGSEGV << 8) | 0x7f)); test_assert(p[0] == 77); /* Check that code actually ran */ test_assert(p[1] == 78); /* Continue with the signal again. This should be fatal. */ test_assert(0 == ptrace(PTRACE_CONT, child, NULL, (void*)SIGSEGV)); test_assert(child == waitpid(child, &status, 0)); test_assert(WIFSIGNALED(status)); test_assert(WTERMSIG(status) == SIGSEGV); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/ptracer_death.c000066400000000000000000000023731265436462100166740ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int status_pipe[2]; static int ptracer(void) { pid_t child; int status; struct timespec ts = { 0, 50000000 }; int ready_pipe[2]; char ready = 'R'; test_assert(0 == pipe(ready_pipe)); if (0 == (child = fork())) { char ch = 0; char ok = 'K'; test_assert(1 == read(ready_pipe[0], &ch, 1)); test_assert(ch == 'R'); test_assert(1 == write(status_pipe[1], &ok, 1)); return 77; } nanosleep(&ts, NULL); test_assert(0 == ptrace(PTRACE_ATTACH, child, NULL, NULL)); test_assert(child == waitpid(child, &status, 0)); test_assert(status == ((SIGSTOP << 8) | 0x7f)); test_assert(1 == write(ready_pipe[1], &ready, 1)); /* Now just exit, and the child should resume */ return 44; } int main(int argc, char* argv[]) { char ch = 0; pid_t ptracer_pid; int status; test_assert(0 == pipe(status_pipe)); if (0 == (ptracer_pid = fork())) { return ptracer(); } test_assert(ptracer_pid == waitpid(ptracer_pid, &status, 0)); test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 44); test_assert(1 == read(status_pipe[0], &ch, 1)); test_assert(ch == 'K'); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/ptracer_death_multithread.c000066400000000000000000000053021265436462100212710ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" /* This test spawns a ptracer and a ptracee, where the ptracee has 10 sub-threads. The pracer attaches to all the ptracee's threads, then exits. We check that all ptracee threads are resumed. */ static int status_pipe[2]; static int tid_pipe[2]; static int ready_pipe[2]; static int thread_wait_pipe[2]; static void write_tid(void) { pid_t tid = sys_gettid(); test_assert(sizeof(tid) == write(tid_pipe[1], &tid, sizeof(tid))); } static pid_t read_tid(void) { pid_t tid; test_assert(sizeof(tid) == read(tid_pipe[0], &tid, sizeof(tid))); return tid; } static void* child_thread(void* p) { char ch = 0; write_tid(); test_assert(1 == read(thread_wait_pipe[0], &ch, 1)); test_assert('W' == ch); return NULL; } static int child_runner(void) { char ch = 0; pthread_t threads[10]; int i; write_tid(); for (i = 0; i < 10; ++i) { pthread_create(&threads[i], NULL, child_thread, NULL); } atomic_printf("Waiting on ready_pipe\n"); test_assert(1 == read(ready_pipe[0], &ch, 1)); test_assert(ch == 'R'); for (i = 0; i < 10; ++i) { char ch2 = 'W'; test_assert(1 == write(thread_wait_pipe[1], &ch2, 1)); } for (i = 0; i < 10; ++i) { atomic_printf("Joining thread %d\n", i); pthread_join(threads[i], NULL); } char ok = 'K'; test_assert(1 == write(status_pipe[1], &ok, 1)); return 77; } static int ptracer(void) { pid_t child; int status; char ready = 'R'; int i; pid_t child_tids[11]; if (0 == (child = fork())) { return child_runner(); } for (i = 0; i < 11; ++i) { child_tids[i] = read_tid(); } for (i = 0; i < 11; ++i) { int ret; test_assert(0 == ptrace(PTRACE_ATTACH, child_tids[i], NULL, NULL)); ret = waitpid(child_tids[i], &status, __WALL); atomic_printf("waitpid on %d gives %d with errno=%d\n", child_tids[i], ret, errno); test_assert(ret == child_tids[i]); test_assert(status == ((SIGSTOP << 8) | 0x7f)); } test_assert(1 == write(ready_pipe[1], &ready, 1)); /* Now just exit, and all child threads should resume */ return 44; } int main(int argc, char* argv[]) { char ch = 0; pid_t ptracer_pid; int status; test_assert(0 == pipe(ready_pipe)); test_assert(0 == pipe(tid_pipe)); test_assert(0 == pipe(status_pipe)); test_assert(0 == pipe(thread_wait_pipe)); if (0 == (ptracer_pid = fork())) { return ptracer(); } test_assert(ptracer_pid == waitpid(ptracer_pid, &status, 0)); test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 44); test_assert(1 == read(status_pipe[0], &ch, 1)); test_assert(ch == 'K'); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/ptracer_death_multithread_peer.c000066400000000000000000000056051265436462100223120ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" /* This test spawns a ptracer and a ptracee, where the ptracee has 10 sub-threads. The pracer attaches to all the ptracee's threads, then exits. We check that all ptracee threads are resumed. Similar to ptracer_detah_multithread, except the ptracer is not a parent of the ptracee. */ static int status_pipe[2]; static int tid_pipe[2]; static int ready_pipe[2]; static int thread_wait_pipe[2]; static void write_tid(void) { pid_t tid = sys_gettid(); test_assert(sizeof(tid) == write(tid_pipe[1], &tid, sizeof(tid))); } static pid_t read_tid(void) { pid_t tid; test_assert(sizeof(tid) == read(tid_pipe[0], &tid, sizeof(tid))); return tid; } static void* child_thread(void* p) { char ch = 0; write_tid(); test_assert(1 == read(thread_wait_pipe[0], &ch, 1)); test_assert('W' == ch); return NULL; } static int child_runner(void) { char ch = 0; pthread_t threads[10]; int i; /* This fails on some kernels, so don't check its result */ prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY); write_tid(); for (i = 0; i < 10; ++i) { pthread_create(&threads[i], NULL, child_thread, NULL); } atomic_printf("Waiting on ready_pipe\n"); test_assert(1 == read(ready_pipe[0], &ch, 1)); test_assert(ch == 'R'); for (i = 0; i < 10; ++i) { char ch2 = 'W'; test_assert(1 == write(thread_wait_pipe[1], &ch2, 1)); } for (i = 0; i < 10; ++i) { atomic_printf("Joining thread %d\n", i); pthread_join(threads[i], NULL); } char ok = 'K'; test_assert(1 == write(status_pipe[1], &ok, 1)); return 77; } static int ptracer(void) { int status; char ready = 'R'; int i; pid_t child_tids[11]; for (i = 0; i < 11; ++i) { child_tids[i] = read_tid(); } for (i = 0; i < 11; ++i) { int ret; test_assert(0 == ptrace(PTRACE_ATTACH, child_tids[i], NULL, NULL)); ret = waitpid(child_tids[i], &status, __WALL); atomic_printf("waitpid on %d gives %d with errno=%d\n", child_tids[i], ret, errno); test_assert(ret == child_tids[i]); test_assert(status == ((SIGSTOP << 8) | 0x7f)); } test_assert(1 == write(ready_pipe[1], &ready, 1)); /* Now just exit, and all child threads should resume */ return 44; } int main(int argc, char* argv[]) { char ch = 0; pid_t ptracer_pid, child; int status; test_assert(0 == pipe(ready_pipe)); test_assert(0 == pipe(tid_pipe)); test_assert(0 == pipe(status_pipe)); test_assert(0 == pipe(thread_wait_pipe)); if (0 == (child = fork())) { return child_runner(); } if (0 == (ptracer_pid = fork())) { return ptracer(); } test_assert(ptracer_pid == waitpid(ptracer_pid, &status, 0)); test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 44); test_assert(1 == read(status_pipe[0], &ch, 1)); test_assert(ch == 'K'); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/quotactl.c000066400000000000000000000036241265436462100157230ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static char home_device[1001]; static void find_home_device(void) { struct stat home_stat; const char* home = getenv("HOME"); FILE* f; char mount_line[2000]; if (!home || stat(home, &home_stat)) { atomic_printf("Can't stat %s; aborting test\n", home); atomic_puts("EXIT-SUCCESS"); exit(0); } f = fopen("/proc/self/mountinfo", "rt"); test_assert(f != NULL); while (fgets(mount_line, sizeof(mount_line), f)) { int maj, min; int ret; ret = sscanf(mount_line, "%*d %*d %d:%d %*s %*s %*s %*s - %*s %1000s %*s", &maj, &min, home_device); // optional field (7) missing? if (ret != 3) { sscanf(mount_line, "%*d %*d %d:%d %*s %*s %*s - %*s %1000s %*s", &maj, &min, home_device); } if (maj == major(home_stat.st_dev) && min == minor(home_stat.st_dev)) { atomic_printf("%s (%d:%d) is on device special file %s\n", home, maj, min, home_device); return; } } atomic_printf("Can't find filesystem containing %s (%d:%d); aborting test\n", home, major(home_stat.st_dev), minor(home_stat.st_dev)); atomic_puts("EXIT-SUCCESS"); exit(0); } int main(int argc, char* argv[]) { struct dqblk dq; int ret; find_home_device(); ret = quotactl(QCMD(Q_GETQUOTA, USRQUOTA), home_device, getuid(), (caddr_t)&dq); if (ret < 0 && errno == ENOSYS) { atomic_puts("Quotas not supported in this kernel; aborting test"); } else if (ret < 0 && errno == ESRCH) { atomic_puts("Quotas not enabled on this file system; aborting test"); } else if (ret < 0 && errno == ENOTBLK) { atomic_puts("Home directory device is not a block device; aborting test"); } else { test_assert(0 == ret); atomic_printf("QIF bits=%x\n", dq.dqb_valid); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/rdtsc.c000066400000000000000000000010651265436462100152030ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpoint(void) { int break_here = 1; (void)break_here; } int main(int argc, char* argv[]) { int i; uint64_t last_tsc = 0; for (i = 0; i < 100; ++i) { uint64_t tsc; breakpoint(); /* NO SYSCALLS BETWEEN HERE AND RDTSC: next event for * replay must be rdtsc */ tsc = rdtsc(); test_assert(last_tsc < tsc); atomic_printf("%" PRIu64 ",", tsc); last_tsc = tsc; } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/read_bad_mem.py000066400000000000000000000004241265436462100166470ustar00rootroot00000000000000from rrutil import * send_gdb('b main') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1, main') send_gdb('p *0xf') expect_gdb('Cannot access memory at address 0xf') send_gdb('p *0xffffffff') expect_gdb('Cannot access memory at address 0xffffffff') ok() rr-4.1.0/src/test/read_bad_mem.run000066400000000000000000000001051265436462100170170ustar00rootroot00000000000000source `dirname $0`/util.sh record simple$bitness debug read_bad_mem rr-4.1.0/src/test/read_big_struct.c000066400000000000000000000007501265436462100172240ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpoint(void) { int break_here = 1; (void)break_here; } /* FIXME: we should be able to send arbitrarily large structs over the * debugging socket. This is a temporary hack. */ struct big { char bytes[8192]; }; int main(int argc, char* argv[]) { struct big big; memset(&big, 0x5a, sizeof(big)); breakpoint(); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/read_big_struct.py000066400000000000000000000003271265436462100174320ustar00rootroot00000000000000from rrutil import * send_gdb('b breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1, breakpoint') send_gdb('f 1') expect_gdb('(rr)') send_gdb('p big') expect_gdb("bytes = 'Z'") ok() rr-4.1.0/src/test/read_big_struct.run000066400000000000000000000000471265436462100176050ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/read_nothing.c000066400000000000000000000010501265436462100165170ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define SIZE 100000000 int main(int argc, char* argv[]) { int pipe_fds[2]; int i; char* buf = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); test_assert(buf != MAP_FAILED); test_assert(0 == pipe2(pipe_fds, O_NONBLOCK)); for (i = 0; i < 10000; ++i) { test_assert(-1 == read(pipe_fds[0], buf, SIZE)); test_assert(errno == EAGAIN); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/readdir.c000066400000000000000000000007551265436462100155030ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { DIR* dir = opendir("."); struct dirent* ent; test_assert(dir != NULL); while ((ent = readdir(dir)) != NULL) { test_assert(ent->d_reclen >= 8); atomic_printf("%s %lld %lld\n", ent->d_name, (long long)ent->d_ino, (long long)ent->d_off); } test_assert(0 == closedir(dir)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/readlink.c000066400000000000000000000017401265436462100156550ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define BUF_SIZE 10 #define BUF2_SIZE 1000 int main(int argc, char* argv[]) { char path[] = "rr-test-file-XXXXXX"; int fd = mkstemp(path); int count; char link[PATH_MAX]; char* buf = allocate_guard(BUF_SIZE, 'q'); char* buf2 = allocate_guard(BUF2_SIZE, 'r'); test_assert(0 <= fd); for (count = 0;; ++count) { sprintf(link, "rr-test-link-%d", count); int ret = symlink(path, link); if (ret == 0) { break; } test_assert(errno == EEXIST); } test_assert(BUF_SIZE == readlink(link, buf, BUF_SIZE)); test_assert(0 == memcmp(path, buf, BUF_SIZE)); verify_guard(BUF_SIZE, buf); test_assert(strlen(path) == readlink(link, buf2, BUF2_SIZE)); test_assert(0 == memcmp(path, buf2, strlen(path))); verify_guard(BUF2_SIZE, buf2); test_assert(0 == unlink(path)); test_assert(0 == unlink(link)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/readlinkat.c000066400000000000000000000020631265436462100162010ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define BUF_SIZE 10 #define BUF2_SIZE 1000 int main(int argc, char* argv[]) { char path[] = "rr-test-file-XXXXXX"; char dpath[] = "rr-test-dir-XXXXXX"; const char* dir_path = mkdtemp(dpath); int count; char link[PATH_MAX]; char* buf = allocate_guard(BUF_SIZE, 'q'); char* buf2 = allocate_guard(BUF2_SIZE, 'r'); test_assert(0 <= dirfd); chdir(dir_path); for (count = 0;; ++count) { sprintf(link, "rr-test-link-%d", count); int ret = symlink(path, link); if (ret == 0) { break; } test_assert(errno == EEXIST); } int ret = readlinkat(AT_FDCWD, link, buf, BUF_SIZE); test_assert(BUF_SIZE == ret); test_assert(0 == memcmp(path, buf, BUF_SIZE)); verify_guard(BUF_SIZE, buf); test_assert(strlen(path) == readlinkat(AT_FDCWD, link, buf2, BUF2_SIZE)); test_assert(0 == memcmp(path, buf2, strlen(path))); verify_guard(BUF2_SIZE, buf2); test_assert(0 == unlink(link)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/readv.c000066400000000000000000000024311265436462100151630ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static char data[10] = "0123456789"; static void test(int use_preadv) { char name[] = "/tmp/rr-readv-XXXXXX"; int fd = mkstemp(name); struct { char ch[7]; } * part1; struct { char ch[10]; } * part2; struct iovec iovs[2]; ssize_t nread; test_assert(fd >= 0); test_assert(0 == unlink(name)); test_assert(sizeof(data) == write(fd, data, sizeof(data))); ALLOCATE_GUARD(part1, 'x'); ALLOCATE_GUARD(part2, 'y'); iovs[0].iov_base = part1; iovs[0].iov_len = sizeof(*part1); iovs[1].iov_base = part2; iovs[1].iov_len = sizeof(*part2); if (use_preadv) { /* Work around busted preadv prototype in older libcs */ nread = syscall(SYS_preadv, fd, iovs, 2, 0, 0); } else { test_assert(0 == lseek(fd, 0, SEEK_SET)); nread = readv(fd, iovs, 2); } test_assert(sizeof(data) == nread); test_assert(0 == memcmp(part1, data, sizeof(*part1))); test_assert( 0 == memcmp(part2, data + sizeof(*part1), sizeof(data) - sizeof(*part1))); test_assert(part2->ch[sizeof(data) - sizeof(*part1)] == 'y'); VERIFY_GUARD(part1); VERIFY_GUARD(part2); } int main(int argc, char* argv[]) { test(0); test(1); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/remove_watchpoint.py000066400000000000000000000010761265436462100200310ustar00rootroot00000000000000from rrutil import * send_gdb('break main') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('watch -l var') expect_gdb('Hardware[()/a-z ]+watchpoint 2') send_gdb('c') expect_gdb('Old value = 0') expect_gdb('New value = 42') send_gdb('p atomic_printf("hello%s", "kitty")') expect_gdb('hellokitty') send_gdb('delete 2') send_gdb('break pthread_join') expect_gdb('Breakpoint 3') send_gdb('c') expect_gdb('Breakpoint 3') send_gdb('p atomic_printf("hello%s", "kitty")') expect_gdb('hellokitty') send_gdb('c') expect_gdb('EXIT-SUCCESS') ok() rr-4.1.0/src/test/remove_watchpoint.run000066400000000000000000000001161265436462100201770ustar00rootroot00000000000000source `dirname $0`/util.sh record watchpoint$bitness debug remove_watchpoint rr-4.1.0/src/test/restart_abnormal_exit.c000066400000000000000000000003201265436462100204450ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { atomic_puts("EXIT-SUCCESS"); kill(getppid(), SIGINT); return 0; } rr-4.1.0/src/test/restart_abnormal_exit.py000066400000000000000000000006561265436462100206670ustar00rootroot00000000000000from rrutil import * send_gdb('c') expect_rr('EXIT-SUCCESS') expect_gdb('exited') send_gdb('b main') expect_gdb('Breakpoint 1') restart_replay() expect_gdb('Breakpoint 1') send_gdb('checkpoint') expect_gdb('= 1') send_gdb('c') expect_rr('EXIT-SUCCESS') expect_gdb('exited') restart_replay() expect_gdb('Breakpoint 1') send_gdb('checkpoint') expect_gdb('= 2') send_gdb('c') expect_rr('EXIT-SUCCESS') expect_gdb('exited') ok() rr-4.1.0/src/test/restart_abnormal_exit.run000066400000000000000000000000471265436462100210350ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/restart_breakpoint.py000066400000000000000000000005431265436462100201740ustar00rootroot00000000000000from rrutil import * send_gdb('b C') expect_gdb('Breakpoint 1') def check_breakpoint(): expect_rr('calling C') expect_gdb('Breakpoint 1, C') send_gdb('bt') expect_gdb('#0[^C]+C[^#]+#1[^B]+B[^#]+#2[^A]+A[^#]+#3[^m]+main') send_gdb('c') check_breakpoint() restart_replay() check_breakpoint() restart_replay() check_breakpoint() ok() rr-4.1.0/src/test/restart_diversion.py000066400000000000000000000006571265436462100200460ustar00rootroot00000000000000from rrutil import * send_gdb('b main') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('check') expect_gdb('= 1') send_gdb('p atomic_printf("hello%s", "kitty")') expect_gdb('hellokitty') restart_replay() expect_gdb('Breakpoint 1') send_gdb('p atomic_printf("hello%s", "kitty")') expect_gdb('hellokitty') send_gdb('restart 1') expect_gdb('stopped') send_gdb('c') expect_rr('EXIT-SUCCESS') ok() rr-4.1.0/src/test/restart_diversion.run000066400000000000000000000001121265436462100202040ustar00rootroot00000000000000source `dirname $0`/util.sh record simple$bitness debug restart_diversion rr-4.1.0/src/test/restart_finish.py000066400000000000000000000002721265436462100173150ustar00rootroot00000000000000from rrutil import * import re restart_replay() send_gdb('c') # A stop fires when we hit an exec expect_rr([ re.compile(r'exited normally'), re.compile(r'stopped') ]) ok() rr-4.1.0/src/test/restart_invalid_checkpoint.py000066400000000000000000000010651265436462100216730ustar00rootroot00000000000000from rrutil import * send_gdb('b main') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('b atomic_puts') expect_gdb('Breakpoint 2') send_gdb('c') expect_gdb('Breakpoint 2') send_gdb('checkpoint') expect_gdb('= 1') send_gdb('restart 8') send_gdb('restart -1') send_gdb('restart abc') send_gdb('restart 1') expect_gdb('stopped') send_gdb('c') # If rr crashes, a 'restart' will re-run the program directly under gdb from # the beginning. If that happens, we'll stop at breakpoint 1, not exit normally. expect_gdb('xited normally') ok() rr-4.1.0/src/test/restart_invalid_checkpoint.run000066400000000000000000000001241265436462100220420ustar00rootroot00000000000000source `dirname $0`/util.sh record simple$bitness debug restart_invalid_checkpoint rr-4.1.0/src/test/restart_unstable.py000066400000000000000000000001711265436462100176500ustar00rootroot00000000000000from rrutil import * send_gdb('c') expect_gdb('exited normally') restart_replay() expect_gdb('exited normally') ok() rr-4.1.0/src/test/restart_unstable.run000066400000000000000000000001151265436462100200220ustar00rootroot00000000000000source `dirname $0`/util.sh record exit_group$bitness debug restart_unstable rr-4.1.0/src/test/reverse_alarm.py000066400000000000000000000004221265436462100171150ustar00rootroot00000000000000from rrutil import * send_gdb('break main') expect_gdb('Breakpoint 1') send_gdb('break breakpoint') expect_gdb('Breakpoint 2') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 2') send_gdb('reverse-continue') expect_gdb('Breakpoint 1') ok() rr-4.1.0/src/test/reverse_alarm.run000066400000000000000000000001051265436462100172670ustar00rootroot00000000000000source `dirname $0`/util.sh record alarm$bitness debug reverse_alarm rr-4.1.0/src/test/reverse_continue_breakpoint.c000066400000000000000000000014601265436462100216600ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static volatile int k = 2; static void breakpoint(void) {} static void breakpoint2(void) {} int main(int argc, char* argv[]) { int i; int j; int fd = open("/dev/null", O_WRONLY); test_assert(fd >= 0); /* We need to test reverse-continue through a bunch of breakpoints when the next event is a syscallbuf-flush. So we need the next event to be syscall-buffered. Do a write here so that the syscall is patched now, not later. */ test_assert(1 == write(fd, ".", 1)); for (i = 0; i < 2000000; ++i) { breakpoint2(); for (j = 0; j < 10; ++j) { k = k * 37; } } breakpoint(); test_assert(1 == write(fd, ".", 1)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/reverse_continue_breakpoint.py000066400000000000000000000003531265436462100220660ustar00rootroot00000000000000from rrutil import * send_gdb('break breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('break breakpoint2') expect_gdb('Breakpoint 2') send_gdb('reverse-cont') expect_gdb('Breakpoint 2') ok() rr-4.1.0/src/test/reverse_continue_breakpoint.run000066400000000000000000000000471265436462100222420ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/reverse_continue_exec_subprocess.py000066400000000000000000000007701265436462100231270ustar00rootroot00000000000000from rrutil import * import re send_gdb('reverse-cont') expect_gdb('stopped') send_gdb('reverse-cont') expect_gdb('stopped') send_gdb('reverse-stepi') expect_gdb('stopped') send_gdb('reverse-cont') expect_gdb('stopped') send_gdb('b main') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('reverse-cont') expect_gdb('stopped') send_gdb('reverse-cont') expect_gdb('stopped') send_gdb('reverse-stepi') expect_gdb('stopped') send_gdb('reverse-cont') expect_gdb('stopped') ok() rr-4.1.0/src/test/reverse_continue_exec_subprocess.run000066400000000000000000000005121265436462100232750ustar00rootroot00000000000000source `dirname $0`/util.sh save_exe simple$bitness saved_simple="simple$bitness-$nonce" save_exe target_process$bitness record "target_process$bitness" "$saved_simple" TARGET_PID=$(grep 'child ' record.out | awk '{print $2}') echo Targeting recorded pid $TARGET_PID ... debug reverse_continue_exec_subprocess "-p $TARGET_PID" rr-4.1.0/src/test/reverse_continue_fork_subprocess.py000066400000000000000000000010771265436462100231450ustar00rootroot00000000000000from rrutil import * import re send_gdb('reverse-cont') expect_gdb('stopped') send_gdb('reverse-cont') expect_gdb('stopped') send_gdb('reverse-stepi') expect_gdb('stopped') send_gdb('reverse-cont') expect_gdb('stopped') send_gdb('stepi') send_gdb('reverse-cont') expect_gdb('stopped') send_gdb('reverse-cont') expect_gdb('stopped') send_gdb('reverse-stepi') expect_gdb('stopped') send_gdb('reverse-cont') expect_gdb('stopped') send_gdb('stepi') send_gdb('reverse-stepi') send_gdb('reverse-stepi') expect_gdb('stopped') send_gdb('reverse-cont') expect_gdb('stopped') ok() rr-4.1.0/src/test/reverse_continue_fork_subprocess.run000066400000000000000000000005121265436462100233120ustar00rootroot00000000000000source `dirname $0`/util.sh save_exe simple$bitness saved_simple="simple$bitness-$nonce" save_exe target_process$bitness record "target_process$bitness" "$saved_simple" TARGET_PID=$(grep 'child ' record.out | awk '{print $2}') echo Targeting recorded pid $TARGET_PID ... debug reverse_continue_fork_subprocess "-f $TARGET_PID" rr-4.1.0/src/test/reverse_continue_multiprocess.c000066400000000000000000000024171265436462100222560ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpoint(void) {} static int parent_to_child[2]; static int child_to_parent[2]; int main(int argc, char* argv[]) { pid_t pid; int status; int i; char ch; test_assert(0 == pipe(parent_to_child)); test_assert(0 == pipe(child_to_parent)); /* Force ping-ponging between parent and child. At each iteration the child receives a signal. We debug the parent process; the signals being received by the child while reverse-executing the parent should be ignored at a low enough level they don't impact the performance of reverse-continue. */ breakpoint(); pid = fork(); if (0 == pid) { for (i = 0; i < 1000; ++i) { char ch; test_assert(1 == read(parent_to_child[0], &ch, 1) && ch == 'y'); kill(getpid(), SIGCHLD); test_assert(1 == write(child_to_parent[1], "x", 1)); } return 77; } for (i = 0; i < 1000; ++i) { test_assert(1 == write(parent_to_child[1], "y", 1)); test_assert(1 == read(child_to_parent[0], &ch, 1) && ch == 'x'); } test_assert(pid == wait(&status)); test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 77); breakpoint(); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/reverse_continue_multiprocess.py000066400000000000000000000003261265436462100224610ustar00rootroot00000000000000from rrutil import * import re send_gdb('b breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('rc') expect_gdb('Breakpoint 1') ok() rr-4.1.0/src/test/reverse_continue_multiprocess.run000066400000000000000000000000471265436462100226350ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/reverse_continue_process_signal.c000066400000000000000000000012231265436462100225320ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define NUM_ITERATIONS (1 << 26) int spin(void) { int i, dummy = 0; atomic_puts("spinning"); for (i = 1; i < NUM_ITERATIONS; ++i) { dummy += i % (1 << 20); dummy += i % (79 * (1 << 20)); } return dummy; } int main(int argc, char* argv[]) { pid_t pid; int status; pid = fork(); if (0 == pid) { signal(SIGINT, SIG_IGN); spin(); kill(getpid(), SIGINT); return 77; } test_assert(pid == wait(&status)); test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 77); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/reverse_continue_process_signal.py000066400000000000000000000002241265436462100227400ustar00rootroot00000000000000from rrutil import * send_gdb('handle SIGKILL stop') send_gdb('c') expect_gdb('SIGKILL') send_gdb('reverse-continue') expect_gdb('stopped') ok() rr-4.1.0/src/test/reverse_continue_process_signal.run000066400000000000000000000000471265436462100231170ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/reverse_continue_start.py000066400000000000000000000006311265436462100210640ustar00rootroot00000000000000from rrutil import * import re send_gdb('reverse-cont') expect_gdb('stopped') send_gdb('reverse-cont') expect_gdb('stopped') send_gdb('reverse-stepi') expect_gdb('stopped') send_gdb('b main') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('reverse-cont') expect_gdb('stopped') send_gdb('reverse-stepi') expect_gdb('stopped') send_gdb('reverse-cont') expect_gdb('stopped') ok() rr-4.1.0/src/test/reverse_continue_start.run000066400000000000000000000001171265436462100212370ustar00rootroot00000000000000source `dirname $0`/util.sh record simple$bitness debug reverse_continue_start rr-4.1.0/src/test/reverse_finish.py000066400000000000000000000002561265436462100173060ustar00rootroot00000000000000from rrutil import * send_gdb('break atomic_puts') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('reverse-finish') expect_gdb('main') ok() rr-4.1.0/src/test/reverse_finish.run000066400000000000000000000001071265436462100174550ustar00rootroot00000000000000source `dirname $0`/util.sh record simple$bitness debug reverse_finish rr-4.1.0/src/test/reverse_many_breakpoints.c000066400000000000000000000004761265436462100211710ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpoint(void) {} int main(int argc, char* argv[]) { int i; int result = 0; for (i = 0; i < 100000; ++i) { result += i * i; breakpoint(); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/reverse_many_breakpoints.py000066400000000000000000000003211265436462100213640ustar00rootroot00000000000000from rrutil import * send_gdb('handle SIGKILL stop') send_gdb('c') expect_gdb('SIGKILL') send_gdb('break breakpoint') expect_gdb('Breakpoint 1') send_gdb('reverse-continue') expect_gdb('Breakpoint 1') ok() rr-4.1.0/src/test/reverse_many_breakpoints.run000066400000000000000000000000471265436462100215450ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/reverse_step_breakpoint.py000066400000000000000000000004221265436462100212120ustar00rootroot00000000000000from rrutil import * send_gdb('b main') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('n') send_gdb('break') expect_gdb('Breakpoint 2') send_gdb('reverse-next') expect_gdb('Breakpoint 1') send_gdb('next') expect_gdb('Breakpoint 2') ok() rr-4.1.0/src/test/reverse_step_breakpoint.run000066400000000000000000000001201265436462100213610ustar00rootroot00000000000000source `dirname $0`/util.sh record simple$bitness debug reverse_step_breakpoint rr-4.1.0/src/test/reverse_step_long.c000066400000000000000000000016251265436462100176130ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define NUM_ITERATIONS (1 << 27) static void breakpoint(void) { int break_here = 1; (void)break_here; } static int spin(void) { int i, dummy = 0; atomic_puts("spinning"); for (i = 1; i < 1 << 28; ++i) { dummy += i % (1 << 20); dummy += i % (79 * (1 << 20)); } return dummy; } /** * We'll break in do_thread, continue until SIGKILL, and * then try a reverse-stepi. This will have to search back through * several checkpoints to find the last completed singlestep for * the thread. */ static void* do_thread(void* p) { char ch; breakpoint(); /* Will never return */ read(STDIN_FILENO, &ch, 1); return NULL; } int main(int argc, char* argv[]) { pthread_t thread; pthread_create(&thread, NULL, do_thread, NULL); spin(); atomic_printf("EXIT-SUCCESS\n"); return 0; } rr-4.1.0/src/test/reverse_step_long.py000066400000000000000000000004531265436462100200170ustar00rootroot00000000000000from rrutil import * send_gdb('handle SIGKILL stop') send_gdb('break breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('SIGKILL') # This should not hang send_gdb('reverse-stepi') send_gdb('reverse-continue') expect_gdb('Breakpoint 1') ok() rr-4.1.0/src/test/reverse_step_long.run000066400000000000000000000000471265436462100201720ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/reverse_step_signal.py000066400000000000000000000010361265436462100203330ustar00rootroot00000000000000from rrutil import * import re send_gdb('watch num_signals_caught') expect_gdb('Hardware watchpoint 1') send_gdb('c') expect_gdb('received signal') send_gdb('c') expect_gdb('Hardware watchpoint 1') expect_gdb('Old value = 0') expect_gdb('New value = 1') send_gdb('reverse-continue') expect_gdb('Hardware watchpoint 1') expect_gdb('Old value = 1') expect_gdb('New value = 0') send_gdb('reverse-finish') expect_gdb('raise') send_gdb('reverse-stepi') expect_gdb('received signal') send_gdb('reverse-continue') expect_gdb('stopped') ok() rr-4.1.0/src/test/reverse_step_signal.run000066400000000000000000000001131265436462100205020ustar00rootroot00000000000000source `dirname $0`/util.sh record sigrt$bitness debug reverse_step_signal rr-4.1.0/src/test/reverse_step_threads.c000066400000000000000000000013251265436462100203030ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define NUM_ITERATIONS (1 << 27) static void breakpoint(void) { int break_here = 1; (void)break_here; } int spin(int iterations) { int i, dummy = 0; atomic_puts("spinning"); for (i = 1; i < iterations; ++i) { dummy += i % (1 << 20); dummy += i % (79 * (1 << 20)); } return dummy; } static void* do_thread(void* p) { breakpoint(); return NULL; } int main(int argc, char* argv[]) { int s = spin(NUM_ITERATIONS); pthread_t thread; pthread_create(&thread, NULL, do_thread, NULL); pthread_join(thread, NULL); s = spin(1000); atomic_printf("EXIT-SUCCESS dummy=%d\n", s); return 0; } rr-4.1.0/src/test/reverse_step_threads.py000066400000000000000000000003431265436462100205100ustar00rootroot00000000000000from rrutil import * send_gdb('b breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('set scheduler-locking off') send_gdb('reverse-step') send_gdb('c') expect_gdb('Breakpoint 1') ok() rr-4.1.0/src/test/reverse_step_threads.run000066400000000000000000000000471265436462100206650ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/reverse_step_threads2.py000066400000000000000000000004501265436462100205710ustar00rootroot00000000000000from rrutil import * send_gdb('b spin') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('finish') send_gdb('set scheduler-locking off') send_gdb('reverse-step') send_gdb('reverse-continue') expect_gdb('Breakpoint 1') ok() rr-4.1.0/src/test/reverse_step_threads2.run000066400000000000000000000001341265436462100207440ustar00rootroot00000000000000source `dirname $0`/util.sh record reverse_step_threads$bitness debug reverse_step_threads2 rr-4.1.0/src/test/reverse_step_threads_break.c000066400000000000000000000024371265436462100214540ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int main_to_thread_fds[2]; static int thread_to_main_fds[2]; static void breakpoint(void) {} static void breakpoint_thread(void) {} static size_t my_read(int fd, void* buf, size_t size) { size_t ret; #ifdef __x86_64__ __asm__("syscall\n\t" : "=a"(ret) : "a"(SYS_read), "D"(fd), "S"(buf), "d"(size)); #elif defined(__i386__) __asm__("xchg %%ebx,%%edi\n\t" "int $0x80\n\t" "xchg %%ebx,%%edi\n\t" : "=a"(ret) : "a"(SYS_read), "c"(buf), "d"(size), "D"(fd)); #else #error define syscall here #endif return ret; } static void* do_thread(void* p) { char ch; breakpoint_thread(); test_assert(1 == write(thread_to_main_fds[1], "y", 1)); test_assert(1 == read(main_to_thread_fds[0], &ch, 1)); return NULL; } int main(int argc, char* argv[]) { pthread_t thread; char ch; test_assert(0 == pipe(thread_to_main_fds)); test_assert(0 == pipe(main_to_thread_fds)); pthread_create(&thread, NULL, do_thread, NULL); test_assert(1 == my_read(thread_to_main_fds[0], &ch, 1)); breakpoint(); test_assert(1 == write(main_to_thread_fds[1], "x", 1)); pthread_join(thread, NULL); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/reverse_step_threads_break.py000066400000000000000000000004501265436462100216530ustar00rootroot00000000000000from rrutil import * send_gdb('b breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('b breakpoint_thread') expect_gdb('Breakpoint 2') send_gdb('set scheduler-locking off') for i in xrange(20): send_gdb('reverse-step') expect_gdb('Breakpoint 2') ok() rr-4.1.0/src/test/reverse_step_threads_break.run000066400000000000000000000000471265436462100220310ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/reverse_watchpoint.py000066400000000000000000000012651265436462100202070ustar00rootroot00000000000000from rrutil import * send_gdb('break atomic_puts') expect_gdb('Breakpoint 1') send_gdb('watch var') expect_gdb('Hardware watchpoint 2') send_gdb('c') expect_gdb('Hardware watchpoint 2') expect_gdb('Old value = 0') expect_gdb('New value = 42') send_gdb('c') expect_gdb('Hardware watchpoint 2') expect_gdb('Old value = 42') expect_gdb('New value = 1337') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('reverse-cont') expect_gdb('Hardware watchpoint 2') expect_gdb('Old value = 1337') expect_gdb('New value = 42') send_gdb('reverse-cont') expect_gdb('Hardware watchpoint 2') expect_gdb('Old value = 42') expect_gdb('New value = 0') send_gdb('reverse-cont') expect_gdb('stopped') ok() rr-4.1.0/src/test/reverse_watchpoint.run000066400000000000000000000001171265436462100203560ustar00rootroot00000000000000source `dirname $0`/util.sh record watchpoint$bitness debug reverse_watchpoint rr-4.1.0/src/test/reverse_watchpoint_syscall.py000066400000000000000000000012011265436462100217270ustar00rootroot00000000000000from rrutil import * send_gdb('break atomic_puts') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('watch -l *p') expect_gdb('Hardware[()/a-z ]+watchpoint 2') send_gdb('reverse-continue') expect_gdb('Old value = ') expect_gdb('New value = 0') send_gdb('reverse-continue') expect_gdb('Old value = 0') expect_gdb('New value = 98') send_gdb('reverse-continue') expect_gdb('Old value = 98') expect_gdb('New value = 0') send_gdb('reverse-continue') expect_gdb('Old value = 0') expect_gdb('New value = 97') send_gdb('reverse-continue') expect_gdb('Old value = 97') expect_gdb('New value = 0') ok() rr-4.1.0/src/test/reverse_watchpoint_syscall.run000066400000000000000000000001371265436462100221120ustar00rootroot00000000000000source `dirname $0`/util.sh record watchpoint_syscall$bitness debug reverse_watchpoint_syscall rr-4.1.0/src/test/rlimit.c000066400000000000000000000013521265436462100153630ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { struct rlimit* r; struct rlimit* r2; ALLOCATE_GUARD(r, 0); ALLOCATE_GUARD(r2, 'x'); test_assert(0 == getrlimit(RLIMIT_FSIZE, r)); test_assert(r->rlim_cur > 0); test_assert(r->rlim_max > 0); VERIFY_GUARD(r); r->rlim_cur /= 2; test_assert(0 == setrlimit(RLIMIT_FSIZE, r)); VERIFY_GUARD(r); test_assert(0 == getrlimit(RLIMIT_FSIZE, r2)); test_assert(r2->rlim_cur == r->rlim_cur); VERIFY_GUARD(r2); test_assert(0 == prlimit(0, RLIMIT_FSIZE, r, r2)); test_assert(r2->rlim_cur == r->rlim_cur); VERIFY_GUARD(r); VERIFY_GUARD(r2); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/robust_futex.c000066400000000000000000000016051265436462100166150ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static pthread_mutex_t mutex; static void* run_thread(void* arg) { void* p; size_t len; syscall(SYS_get_robust_list, 0, &p, &len); atomic_printf("robust_list = %p, len = %d\n", p, (int)len); test_assert(0 == pthread_mutex_lock(&mutex)); return NULL; } int main(int argc, char* argv[]) { pthread_mutexattr_t attr; pthread_t thread; pthread_mutexattr_init(&attr); pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST); pthread_mutex_init(&mutex, &attr); pthread_create(&thread, NULL, run_thread, NULL); pthread_join(thread, NULL); test_assert(EOWNERDEAD == pthread_mutex_lock(&mutex)); pthread_mutex_consistent(&mutex); test_assert(0 == pthread_mutex_unlock(&mutex)); test_assert(0 == pthread_mutex_lock(&mutex)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/rrutil.h000066400000000000000000000116761265436462100154230ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RRUTIL_H #define RRUTIL_H #define _GNU_SOURCE 1 #define _POSIX_C_SOURCE 2 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__i386__) #include "SyscallEnumsForTestsX86.generated" #elif defined(__x86_64__) #include "SyscallEnumsForTestsX64.generated" #else #error Unknown architecture #endif #include typedef unsigned char uint8_t; #define ALEN(_a) (sizeof(_a) / sizeof(_a[0])) /** * Print the printf-like arguments to stdout as atomic-ly as we can * manage. Async-signal-safe. Does not flush stdio buffers (doing so * isn't signal safe). */ __attribute__((format(printf, 1, 2))) inline static int atomic_printf( const char* fmt, ...) { va_list args; char buf[1024]; int len; va_start(args, fmt); len = vsnprintf(buf, sizeof(buf) - 1, fmt, args); va_end(args); return write(STDOUT_FILENO, buf, len); } /** * Write |str| on its own line to stdout as atomic-ly as we can * manage. Async-signal-safe. Does not flush stdio buffers (doing so * isn't signal safe). */ inline static int atomic_puts(const char* str) { return atomic_printf("%s\n", str); } #define fprintf(...) USE_dont_write_stderr #define printf(...) USE_atomic_printf_INSTEAD #define puts(...) USE_atomic_puts_INSTEAD inline static int check_cond(int cond) { if (!cond) { atomic_printf("FAILED: errno=%d (%s)\n", errno, strerror(errno)); } return cond; } #define test_assert(cond) assert("FAILED: !" && check_cond(cond)) /** * Return the calling task's id. */ inline static pid_t sys_gettid(void) { return syscall(SYS_gettid); } /** * Ensure that |len| bytes of |buf| are the same across recording and * replay. */ inline static void check_data(void* buf, size_t len) { syscall(SYS_write, RR_MAGIC_SAVE_DATA_FD, buf, len); atomic_printf("Wrote %zu bytes to magic fd\n", len); } /** * Return the current value of the time-stamp counter. */ inline static uint64_t rdtsc(void) { return __rdtsc(); } static uint64_t GUARD_VALUE = 0xdeadbeeff00dbaad; /** * Allocate 'size' bytes, fill with 'value', and place canary values before * and after the allocated block. */ inline static void* allocate_guard(size_t size, char value) { char* cp = (char*)malloc(size + 2 * sizeof(GUARD_VALUE)) + sizeof(GUARD_VALUE); memcpy(cp - sizeof(GUARD_VALUE), &GUARD_VALUE, sizeof(GUARD_VALUE)); memcpy(cp + size, &GUARD_VALUE, sizeof(GUARD_VALUE)); memset(cp, value, size); return cp; } /** * Verify that canary values before and after the block allocated at 'p' * (of size 'size') are still valid. */ inline static void verify_guard(size_t size, void* p) { char* cp = (char*)p; test_assert( memcmp(cp - sizeof(GUARD_VALUE), &GUARD_VALUE, sizeof(GUARD_VALUE)) == 0); test_assert(memcmp(cp + size, &GUARD_VALUE, sizeof(GUARD_VALUE)) == 0); } /** * Verify that canary values before and after the block allocated at 'p' * (of size 'size') are still valid, and free the block. */ inline static void free_guard(size_t size, void* p) { verify_guard(size, p); free((char*)p - sizeof(GUARD_VALUE)); } #define ALLOCATE_GUARD(p, v) p = allocate_guard(sizeof(*p), v) #define VERIFY_GUARD(p) verify_guard(sizeof(*p), p) #define FREE_GUARD(p) free_guard(sizeof(*p), p) #ifndef SECCOMP_SET_MODE_STRICT #define SECCOMP_SET_MODE_STRICT 0 #endif #ifndef SECCOMP_SET_MODE_FILTER #define SECCOMP_SET_MODE_FILTER 1 #endif #endif /* RRUTIL_H */ rr-4.1.0/src/test/rrutil.py000066400000000000000000000047041265436462100156160ustar00rootroot00000000000000import pexpect, re, signal, sys, time __all__ = [ 'expect_gdb', 'send_gdb','expect_rr', 'expect_list', 'restart_replay', 'interrupt_gdb', 'ok', 'failed', 'iterlines_both', 'last_match', 'get_exe_arch' ] # Public API def expect_gdb(what): expect(gdb_rr, what) def expect_list(pats): return gdb_rr.expect_list(pats) def expect_rr(what): expect(gdb_rr, what) def failed(why, e=None): print 'FAILED:', why if e: print 'exception:', e clean_up() sys.exit(1) def interrupt_gdb(): try: gdb_rr.kill(signal.SIGINT) except Exception, e: failed('interrupting gdb', e) expect_gdb('stopped.') def iterlines_both(): return gdb_rr def last_match(): return gdb_rr.match def restart_replay(event=0): if event: send_gdb('r %d'%(event)) else: send_gdb('r') # gdb may not prompt here. It's ok to send an unnecessary 'y' # since there is no such command. send_gdb('y') send_gdb('c') def send_gdb(what): send(gdb_rr, "%s\n"%what) def ok(): send_gdb('q') send_gdb('y') clean_up() # Internal helpers TIMEOUT_SEC = 100 # gdb and rr are part of the same process tree, so they share # stdin/stdout. gdb_rr = None def clean_up(): global gdb_rr iterations = 0 while gdb_rr: try: gdb_rr.close(force=1) gdb_rr = None except Exception, e: if iterations < 5: print "close() failed with '%s', retrying..."%e iterations = iterations + 1 else: gdb_rr = None def expect(prog, what): try: prog.expect(what) except Exception, e: failed('expecting "%s"'% (what), e) def get_exe_arch(): send_gdb('show architecture') expect_gdb('The target architecture is set automatically \\(currently ([0-9a-z:-]+)\\)') global gdb_rr return gdb_rr.match.group(1) def get_rr_cmd(): '''Return the command that should be used to invoke rr, as the tuple (executable, array-of-args)''' rrargs = sys.argv[1:] return (rrargs[0], rrargs[1:]) def send(prog, what): try: prog.send(what) except Exception, e: failed('sending "%s"'% (what), e) def set_up(): global gdb_rr try: gdb_rr = pexpect.spawn(*get_rr_cmd(), timeout=TIMEOUT_SEC, logfile=open('gdb_rr.log', 'w')) expect_gdb(r'\(rr\)') except Exception, e: failed('initializing rr and gdb', e) set_up() rr-4.1.0/src/test/run_end.py000066400000000000000000000010571265436462100157250ustar00rootroot00000000000000from rrutil import * send_gdb('handle SIGKILL stop') # Test that invalid syntax doesn't crash rr send_gdb('run run 100') send_gdb('y') send_gdb('break main') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') # Running with a big event number should reach the end of the recording send_gdb('run 10000') send_gdb('y') expect_gdb('SIGKILL') send_gdb('c') expect_gdb('xited normally') send_gdb('run 10000') # no need to confirm since the process already exited expect_gdb('SIGKILL') send_gdb('reverse-cont') expect_gdb('Breakpoint 1') ok() rr-4.1.0/src/test/run_end.run000066400000000000000000000001001265436462100160650ustar00rootroot00000000000000source `dirname $0`/util.sh record simple$bitness debug run_end rr-4.1.0/src/test/run_in_function.py000066400000000000000000000003171265436462100174700ustar00rootroot00000000000000from rrutil import * send_gdb('b main') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('call crash()') expect_gdb('SIGSEGV') restart_replay() expect_gdb('Breakpoint 1') ok() rr-4.1.0/src/test/run_in_function.run000066400000000000000000000001231265436462100176370ustar00rootroot00000000000000source `dirname $0`/util.sh record crash_in_function$bitness debug run_in_function rr-4.1.0/src/test/rusage.c000066400000000000000000000005061265436462100153510ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { struct rusage* r; ALLOCATE_GUARD(r, 0); test_assert(0 == getrusage(RUSAGE_SELF, r)); test_assert(r->ru_maxrss > 0); VERIFY_GUARD(r); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sanity.py000066400000000000000000000000721265436462100155760ustar00rootroot00000000000000from rrutil import * send_gdb('c') expect_rr('Hi') ok() rr-4.1.0/src/test/sanity.run000066400000000000000000000000761265436462100157560ustar00rootroot00000000000000source `dirname $0`/util.sh record hello$bitness debug sanity rr-4.1.0/src/test/save_data_fd.c000066400000000000000000000007661265436462100164730ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define DEV_RANDOM "/dev/urandom" int main(int argc, char** argv) { int fd = open(DEV_RANDOM, O_RDONLY); char buf[128]; ssize_t nread; test_assert(0 <= fd); nread = read(fd, buf, sizeof(buf)); atomic_printf("Read %zd random bytes (expected %zu)\n", nread, sizeof(buf)); test_assert(nread == sizeof(buf)); check_data(buf, sizeof(buf)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sched_setaffinity.c000066400000000000000000000015311265436462100175550ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(void) { cpu_set_t* cpus; cpu_set_t* cpus_out; cpu_set_t* cpus_out_2; ALLOCATE_GUARD(cpus, 'x'); CPU_ZERO(cpus); CPU_SET(0, cpus); test_assert(0 == sched_setaffinity(0, sizeof(*cpus), cpus)); VERIFY_GUARD(cpus); ALLOCATE_GUARD(cpus_out, 'x'); test_assert(0 == sched_getaffinity(0, sizeof(*cpus_out), cpus_out)); /* We can't assert this because rr assigns us random affinity itself. test_assert(0 == memcmp(cpus, cpus_out, sizeof(*cpus))); */ VERIFY_GUARD(cpus_out); ALLOCATE_GUARD(cpus_out_2, 'y'); test_assert(0 == sched_getaffinity(0, sizeof(*cpus_out_2), cpus_out_2)); test_assert(0 == memcmp(cpus_out, cpus_out_2, sizeof(*cpus_out))); VERIFY_GUARD(cpus_out_2); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sched_setparam.c000066400000000000000000000013241265436462100170440ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(void) { struct sched_param* param; int scheduler; int min_priority; int max_priority; scheduler = sched_getscheduler(0); test_assert(scheduler >= 0); ALLOCATE_GUARD(param, 'x'); test_assert(0 == sched_getparam(0, param)); VERIFY_GUARD(param); min_priority = sched_get_priority_min(scheduler); test_assert(min_priority >= 0); max_priority = sched_get_priority_max(scheduler); test_assert(max_priority >= 0); test_assert(min_priority <= max_priority); param->sched_priority = min_priority; test_assert(0 == sched_setparam(0, param)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sched_yield.c000066400000000000000000000037511265436462100163440ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" enum { PING, PONG } last; pthread_once_t init_once = PTHREAD_ONCE_INIT; static void init_ping(void) { last = PONG; } static void init_pong(void) { last = PING; } static pthread_barrier_t bar; static int ping_ready; static int pong_ready; static void ping_pong(int which) { volatile int* self_ready = which == PING ? &ping_ready : &pong_ready; volatile int* other_ready = which == PING ? &pong_ready : &ping_ready; int i; /* Efficiently wait for the other thread to arrive. */ pthread_barrier_wait(&bar); /* Semi-busy-wait for both threads to be runnable. (One * thread exiting a barrier doesn't guarantee that the other * thread is immediately runnable.) */ *self_ready = 1; do { sched_yield(); } while (!*other_ready); /* Whichever thread reaches this loop first initializes the * other to be "last". */ pthread_once(&init_once, which == PING ? init_ping : init_pong); for (i = 0; i < 50; ++i) { /* Ensure that the other thread was the last to run * the loop body. */ which == PING ? test_assert("ping thread: " && PONG == last) : test_assert("pong thread: " && PING == last); last = which; /* Schedule the other thread run the next loop * iteration. * * NB: because the kernel scheduler is far more * complicated than rr's, this simplistic assumption * won't hold. If rr's scheduler grows to that level * of complexity, it's probably best to remove this * test. */ sched_yield(); } } static void* pong_thread(void* unused) { ping_pong(PONG); return NULL; } int main(void) { cpu_set_t cpus; pthread_t t; CPU_ZERO(&cpus); CPU_SET(0, &cpus); sched_setaffinity(0, sizeof(cpus), &cpus); pthread_barrier_init(&bar, NULL, 2); test_assert(0 == pthread_create(&t, NULL, pong_thread, NULL)); ping_pong(PING); pthread_join(t, NULL); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sched_yield_to_lower_priority.c000066400000000000000000000010471265436462100222130ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static volatile int low_priority_thread_scheduled; static void* low_priority_thread(void* p) { setpriority(PRIO_PROCESS, 0, 4); low_priority_thread_scheduled = 1; return NULL; } int main(void) { pthread_t thread; pthread_create(&thread, NULL, low_priority_thread, NULL); test_assert(!low_priority_thread_scheduled); do { sched_yield(); } while (!low_priority_thread_scheduled); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/scm_rights.c000066400000000000000000000064361265436462100162350ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define MAGIC 0x1cd00d00 static void child(int sock, int fd_minus_one) { struct sockaddr addr; int fd; struct msghdr msg = { 0 }; union { int ints[2]; uint8_t bytes[sizeof(int[2])]; } mbuf; struct iovec iov; /* make cbuf bigger than necessary so we can test that the correct value is written back (the amount actually written by the kernel) */ uint8_t cbuf[CMSG_SPACE(sizeof(fd)) + 77]; const struct cmsghdr* cmsg; int zero = ~0; ssize_t nread; memset(&addr, 0x51, sizeof(addr)); msg.msg_name = &addr; msg.msg_namelen = sizeof(addr); mbuf.ints[0] = mbuf.ints[1] = ~MAGIC; iov.iov_base = mbuf.bytes; iov.iov_len = sizeof(mbuf); msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_control = cbuf; msg.msg_controllen = sizeof(cbuf); msg.msg_flags = -1; atomic_printf("c: receiving msg ...\n"); nread = recvmsg(sock, &msg, 0); atomic_printf("c: ... got %#x (%zd bytes), %zu control bytes\n", mbuf.ints[0], nread, msg.msg_controllen); test_assert(nread == sizeof(mbuf.ints[0])); test_assert(MAGIC == mbuf.ints[0]); test_assert(~MAGIC == mbuf.ints[1]); test_assert(msg.msg_controllen == CMSG_SPACE(sizeof(fd))); atomic_printf("c: ... and %d name bytes\n", msg.msg_namelen); test_assert(0 == msg.msg_namelen); atomic_printf("c: ... and flags %d\n", msg.msg_flags); test_assert(0 == msg.msg_flags); cmsg = CMSG_FIRSTHDR(&msg); test_assert(SOL_SOCKET == cmsg->cmsg_level && SCM_RIGHTS == cmsg->cmsg_type); memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd)); atomic_printf("c: ... and fd %d; should have received %d\n", fd, fd_minus_one + 1); test_assert(fd - 1 == fd_minus_one || fd - 2 == fd_minus_one); atomic_printf("c: reading from /dev/zero ...\n"); nread = read(fd, &zero, sizeof(zero)); atomic_printf("c: ... got %d (%zd bytes) %s\n", zero, nread, strerror(errno)); test_assert(0 == zero); exit(0); } int main(int argc, char* argv[]) { int sockfds[2]; int sock; pid_t c; int fd; struct msghdr msg = { 0 }; int mbuf = MAGIC; struct iovec iov; uint8_t cbuf[CMSG_SPACE(sizeof(fd))]; struct cmsghdr* cmsg; ssize_t nsent; int err; int status; test_assert(0 == socketpair(AF_LOCAL, SOCK_STREAM, 0, sockfds)); sock = sockfds[0]; fd = open("/dev/null", O_WRONLY); if (0 == (c = fork())) { child(sockfds[1], fd); test_assert("Not reached" && 0); } usleep(500000); fd = open("/dev/zero", O_RDONLY); iov.iov_base = &mbuf; iov.iov_len = sizeof(mbuf); msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_control = cbuf; msg.msg_controllen = sizeof(cbuf); cmsg = CMSG_FIRSTHDR(&msg); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; cmsg->cmsg_len = CMSG_LEN(sizeof(fd)); memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd)); atomic_printf("P: sending %#x with fd %d ...\n", mbuf, fd); nsent = sendmsg(sock, &msg, 0); err = errno; atomic_printf("P: ... sent %zd bytes (%s/%d)\n", nsent, strerror(err), err); test_assert(0 < nsent); atomic_printf("P: waiting on child %d ...\n", c); test_assert(c == waitpid(c, &status, 0)); test_assert(WIFEXITED(status) && 0 == WEXITSTATUS(status)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/search.c000066400000000000000000000021701265436462100153270ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" char buf[1024] = { 99, 1, 2, 2, 3, 0 }; char* p; char* p_end; int* argc_ptr; static void breakpoint(void) {} int main(int argc, char* argv[]) { /* 'buf' could be mapped twice in our address space, once in our data segment and once in the text segment. Tests that search the whole address space for the contents of 'buf' don't want to find a spurious match in .text, so setup buf with its intended contents here. After this the copy in .text should continue to map the old value. */ buf[0] = 0; argc_ptr = &argc; p = (char*)mmap(NULL, PAGE_SIZE * 4, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); test_assert(p != MAP_FAILED); p_end = p + PAGE_SIZE * 4; memcpy(p + PAGE_SIZE, buf, sizeof(buf)); memcpy(p + PAGE_SIZE * 2, buf, sizeof(buf)); test_assert(0 == munmap(p, PAGE_SIZE)); test_assert(0 == munmap(p + PAGE_SIZE * 3, PAGE_SIZE)); test_assert(0 == mprotect(p + PAGE_SIZE, PAGE_SIZE, PROT_NONE)); breakpoint(); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/search.py000066400000000000000000000024211265436462100155340ustar00rootroot00000000000000from rrutil import * send_gdb('break breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('find buf,+1,(char)0') expect_gdb('') expect_gdb('1 pattern found') send_gdb('find buf,+1000,(char)99') expect_gdb('Pattern not found') send_gdb('find buf,+1000,(char)2') expect_gdb('') expect_gdb('3 patterns found') send_gdb('up'); send_gdb('find 0,-10L,&argc') expect_gdb('') send_gdb('down'); ok() rr-4.1.0/src/test/search.run000066400000000000000000000000471265436462100157120ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/seccomp.c000066400000000000000000000115161265436462100155170ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define SYS_rrcall_init_buffers 443 static int count_SIGSYS = 0; static int pipe_fds[2]; static void handler(int sig, siginfo_t* si, void* p) { ucontext_t* ctx = p; /* some versions of system headers don't define si_arch, si_call_addr or * si_syscall. Just skip tests on those systems. */ #ifdef __i386__ int syscallno = ctx->uc_mcontext.gregs[REG_EAX]; #elif defined(__x86_64__) int syscallno = ctx->uc_mcontext.gregs[REG_RAX]; #else #error define architecture here #endif #ifdef si_arch #ifdef __i386__ test_assert(si->si_arch == AUDIT_ARCH_I386); #elif defined(__x86_64__) test_assert(si->si_arch == AUDIT_ARCH_X86_64); #endif #endif test_assert(syscallno == SYS_geteuid || syscallno == SYS_open); test_assert(sig == SIGSYS); test_assert(si->si_signo == SIGSYS); test_assert(si->si_errno == 0); test_assert(si->si_code == 1 /* SYS_SECCOMP */); #ifdef si_call_addr #ifdef __i386__ test_assert((uintptr_t)si->si_call_addr == ctx->uc_mcontext.gregs[REG_EIP]); #elif defined(__x86_64__) test_assert((uintptr_t)si->si_call_addr == ctx->uc_mcontext.gregs[REG_RIP]); #else #error define architecture here #endif #endif #ifdef si_syscall test_assert(si->si_syscall == syscallno); #endif ++count_SIGSYS; } static void install_filter(void) { struct sock_filter filter[] = { /* Load system call number from 'seccomp_data' buffer into accumulator */ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, nr)), /* Jump forward 1 instruction if system call number is not SYS_pipe */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_pipe, 0, 1), /* Error out with ESRCH */ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (ESRCH & SECCOMP_RET_DATA)), /* Jump forward 1 instruction if system call number is not SYS_geteuid */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_geteuid, 0, 1), /* Trigger SIGSYS */ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP), /* Jump forward 1 instruction if system call number is not SYS_open */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_open, 0, 1), /* Trigger SIGSYS */ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP), /* Jump forward 1 instruction if system call number is not SYS_rrcall_init_buffers */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_rrcall_init_buffers, 0, 1), /* Trigger SIGSYS */ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP), /* Jump forward 1 instruction if system call number is not SYS_ioctl */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_ioctl, 0, 1), /* Trigger SIGSYS */ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP), /* Destination of system call number mismatch: allow other system calls */ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW) }; struct sock_fprog prog = { .len = (unsigned short)(sizeof(filter) / sizeof(filter[0])), .filter = filter, }; int ret; ret = syscall(RR_seccomp, SECCOMP_SET_MODE_FILTER, 0, &prog); if (ret == -1 && errno == ENOSYS) { ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); } test_assert(ret == 0); } static void* waiting_thread(void* p) { char buf; test_assert(1 == read(pipe_fds[0], &buf, 1)); /* Check this thread wasn't affected by the SET_SECCOMP */ test_assert(0 == prctl(PR_GET_SECCOMP)); return NULL; } static void* run_thread(void* p) { atomic_printf("EXIT-"); return NULL; } int main(int argc, char* argv[]) { struct sigaction sa; pthread_t thread; pthread_t w_thread; char ch; test_assert(0 == pipe(pipe_fds)); sa.sa_sigaction = handler; sigemptyset(&sa.sa_mask); sa.sa_flags = SA_SIGINFO; sigaction(SIGSYS, &sa, NULL); pthread_create(&w_thread, NULL, waiting_thread, NULL); /* Prepare syscallbuf patch path. Need to do this after pthread_create since when we have more than one thread we take a different syscall path... */ open("/dev/null", O_RDONLY); test_assert(0 == prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); test_assert(1 == prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0)); install_filter(); test_assert(2 == prctl(PR_GET_SECCOMP)); test_assert(1 == write(pipe_fds[1], "c", 1)); pthread_join(w_thread, NULL); test_assert(-1 == syscall(SYS_pipe, pipe_fds)); test_assert(ESRCH == errno); /* Spawning a thread will execute an rrcall_init_buffers syscall, which our filter tries to block but shouldn't be able to. */ pthread_create(&thread, NULL, run_thread, NULL); pthread_join(thread, NULL); /* Check that the ioctls used by syscallbuf aren't blocked */ test_assert(1 == write(pipe_fds[1], "c", 1)); test_assert(1 == read(pipe_fds[0], &ch, 1)); test_assert(1 == write(pipe_fds[1], "c", 1)); test_assert(1 == read(pipe_fds[0], &ch, 1)); syscall(SYS_geteuid); open("/dev/null", O_RDONLY); test_assert(count_SIGSYS == 2); atomic_puts("SUCCESS"); return 0; } rr-4.1.0/src/test/seccomp_null.c000066400000000000000000000006701265436462100165500ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { int ret = syscall(RR_seccomp, SECCOMP_SET_MODE_FILTER, 0, NULL); if (ret == -1 && errno == ENOSYS) { ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL); } test_assert(ret == -1 && errno == EFAULT); test_assert(0 == prctl(PR_GET_SECCOMP)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/segfault.c000066400000000000000000000007671265436462100157060ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void sighandler(int sig) { atomic_printf("caught signal %d, exiting\n", sig); _exit(0); } static void breakpoint(void) { int break_here = 1; (void)break_here; } int main(int argc, char* argv[]) { signal(SIGSEGV, sighandler); breakpoint(); /* NO SYSCALLS BETWEEN HERE AND SEGFAULT BELOW: next event to * replay must be the signal. */ *((volatile int*)0) = 0; return 0; } rr-4.1.0/src/test/segfault.run000066400000000000000000000001051265436462100162520ustar00rootroot00000000000000source `dirname $0`/util.sh compare_test 'caught signal 11, exiting' rr-4.1.0/src/test/self_sigint.c000066400000000000000000000004661265436462100163760ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { /* rr should ignore SIGINT */ kill(getppid(), SIGINT); atomic_puts("EXIT-SUCCESS"); kill(getpid(), SIGINT); test_assert(0 && "Shouldn't reach here"); return 0; } rr-4.1.0/src/test/sem.c000066400000000000000000000073151265436462100146540ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define COUNT 4 static int semid; static char* shmem; #ifdef _SEM_SEMUN_UNDEFINED union semun { int val; struct semid_ds* buf; unsigned short int* array; struct seminfo* __buf; }; #endif static int run_child(void) { int child2; int status; struct sembuf ops[2]; struct timespec ts = { 0, 20000000 }; struct timespec ts_short = { 0, 10000000 }; struct timespec ts_long = { 10000, 0 }; union semun un_arg; struct semid_ds* ds; struct seminfo* si; unsigned short* array; ops[0].sem_num = 0; ops[0].sem_op = 1; ops[0].sem_flg = SEM_UNDO; ops[1].sem_num = 1; ops[1].sem_op = 1; ops[1].sem_flg = SEM_UNDO; test_assert(0 == semop(semid, ops, 2)); *shmem = 0; ALLOCATE_GUARD(ds, 'd'); un_arg.buf = ds; test_assert(0 == semctl(semid, 0, IPC_STAT, un_arg)); VERIFY_GUARD(ds); test_assert(ds->sem_perm.mode == 0666); test_assert(ds->sem_nsems == COUNT); ds->sem_perm.mode = 0660; test_assert(0 == semctl(semid, 0, IPC_SET, un_arg)); ALLOCATE_GUARD(si, 'i'); un_arg.__buf = si; /* The following syscall should always return >= 1, but sometimes it returns 0. I don't know why. */ test_assert(0 <= semctl(semid, 0, IPC_INFO, un_arg)); VERIFY_GUARD(si); test_assert(si->semvmx > 0); test_assert(si->semusz < 100000); /* The following syscall should always return >= 1, but sometimes it returns 0. I don't know why. */ test_assert(0 <= semctl(semid, 0, SEM_INFO, un_arg)); VERIFY_GUARD(si); test_assert(si->semusz > 0); test_assert(si->semusz < 100000); array = allocate_guard(COUNT * sizeof(*array), 'a'); un_arg.array = array; test_assert(0 == semctl(semid, 0, GETALL, un_arg)); verify_guard(COUNT * sizeof(*array), array); test_assert(array[0] == 1); test_assert(array[1] == 1); test_assert(array[2] == 0); test_assert(array[3] == 0); array[2] = 2; test_assert(0 == semctl(semid, 0, SETALL, un_arg)); test_assert(0 == semctl(semid, 1, GETNCNT, NULL)); test_assert(getpid() == semctl(semid, 1, GETPID, NULL)); test_assert(2 == semctl(semid, 2, GETVAL, NULL)); test_assert(0 == semctl(semid, 0, GETZCNT, NULL)); un_arg.val = 0; test_assert(0 == semctl(semid, 2, SETVAL, un_arg)); if ((child2 = fork()) == 0) { ops[0].sem_op = -1; ops[1].sem_op = -1; /* The semtimedop timeout is irrelevant. We're just checking that the syscall works. */ test_assert(0 == semtimedop(semid, ops, 2, &ts_long)); *shmem = 1; test_assert(0 == nanosleep(&ts, NULL)); *shmem = 0; ops[0].sem_op = 1; ops[1].sem_op = 1; test_assert(0 == semtimedop(semid, ops, 2, &ts)); return 0; } test_assert(0 == nanosleep(&ts_short, NULL)); ops[0].sem_op = -1; ops[1].sem_op = -1; test_assert(0 == semop(semid, ops, 2)); test_assert(*shmem == 0); ops[0].sem_op = 1; ops[1].sem_op = 1; test_assert(0 == semop(semid, ops, 2)); test_assert(child2 == waitpid(child2, &status, __WALL)); test_assert(0 == status); return 0; } int main(int argc, char* argv[]) { pid_t child; int status; shmem = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); test_assert(shmem != (void*)-1); semid = semget(IPC_PRIVATE, COUNT, 0666); test_assert(semid >= 0); if ((child = fork()) == 0) { return run_child(); } atomic_printf("child %d\n", child); test_assert(child == waitpid(child, &status, __WALL)); /* delete the sem before testing status, because we want to ensure the segment is deleted even if the test failed. */ test_assert(0 == semctl(semid, 0, IPC_RMID, NULL)); test_assert(status == 0); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sendfile.c000066400000000000000000000027201265436462100156540ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define TOKEN "ABC" #define TOKEN_SIZE sizeof(TOKEN) static const char token_file[] = "rr-sendfile-file.txt"; static const char token_file_out[] = "rr-sendfile-file-out.txt"; void verify_token(int fd) { ssize_t len; char buf[TOKEN_SIZE]; len = read(fd, buf, sizeof(buf)); if (len != TOKEN_SIZE || strcmp(buf, TOKEN)) { atomic_puts("Internal error: FAILED: sendfile wrote the wrong data"); exit(1); } atomic_puts("Got expected token " TOKEN); } int main(void) { int filefd; int filefd_out; loff_t off = 0; filefd = open(token_file, O_RDWR | O_CREAT | O_TRUNC, 0600); filefd_out = open(token_file, O_RDWR | O_CREAT | O_TRUNC, 0600); write(filefd, TOKEN, TOKEN_SIZE); sendfile64(filefd_out, filefd, &off, TOKEN_SIZE); atomic_printf( "sendfile %zu bytes from %d to %d; off changed from 0 to %" PRId64 "\n", TOKEN_SIZE, filefd, filefd_out, off); lseek(filefd_out, 0, SEEK_SET); verify_token(filefd_out); lseek(filefd, 0, SEEK_SET); sendfile64(filefd_out, filefd, NULL, TOKEN_SIZE); atomic_printf("sendfile %zu bytes from %d to %d\n", TOKEN_SIZE, filefd, filefd_out); lseek(filefd_out, 0, SEEK_SET); verify_token(filefd_out); /* The test driver will clean up after us if the test failed * before this. */ unlink(token_file); unlink(token_file_out); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/set_ptracer.c000066400000000000000000000003651265436462100164010ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { test_assert(0 == prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/set_tid_address.c000066400000000000000000000017441265436462100172300ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int v; static int* p; static int pipe_fds[2]; static void* run_thread(void* p) { test_assert(sys_gettid() == syscall(SYS_set_tid_address, &v)); return NULL; } static void* run_thread2(void* q) { test_assert(sys_gettid() == syscall(SYS_set_tid_address, p)); test_assert(1 == write(pipe_fds[1], "x", 1)); return NULL; } int main(int argc, char* argv[]) { pthread_t thread; char ch; v = 1; pthread_create(&thread, NULL, run_thread, NULL); test_assert(0 == syscall(SYS_futex, &v, FUTEX_WAIT, 1, NULL, NULL, 0)); test_assert(0 == v); p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); test_assert(p != MAP_FAILED); test_assert(0 == munmap(p, PAGE_SIZE)); test_assert(0 == pipe(pipe_fds)); pthread_create(&thread, NULL, run_thread2, NULL); test_assert(1 == read(pipe_fds[0], &ch, 1)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/setgid.c000066400000000000000000000004571265436462100153470ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { gid_t gid = getgid(); int err = setgid(gid); atomic_printf("setgid returned: %d\n", err); test_assert(0 == err); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/setgroups.c000066400000000000000000000017331265436462100161210ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #include int main(int argc, char* argv[]) { gid_t old_groups[1024]; gid_t new_groups[1024]; int i; int ret; int num_groups = getgroups(ALEN(old_groups), old_groups); test_assert(num_groups >= 0); /* make sure we have some new groups for setgroups() */ for (i = 0; i < num_groups; ++i) { new_groups[i] = old_groups[i] + 1; } if (num_groups == 0) { new_groups[0] = getegid(); num_groups = 1; } ret = setgroups(num_groups, new_groups); if (ret == -1) { test_assert(errno == EPERM); atomic_puts("Test did nothing because process does not have CAP_SETGID?"); atomic_puts("EXIT-SUCCESS"); return 0; } else { test_assert(getgroups(ALEN(old_groups), old_groups) == num_groups); for (i = 0; i < num_groups; ++i) { test_assert(new_groups[i] == old_groups[i]); } } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/setitimer.c000066400000000000000000000014161265436462100160710ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(void) { struct itimerval* v1; struct itimerval* v2; struct itimerval* v3; ALLOCATE_GUARD(v1, 0); v1->it_interval.tv_sec = 10000; v1->it_interval.tv_usec = 0; v1->it_value.tv_sec = 10000; v1->it_value.tv_usec = 0; test_assert(0 == setitimer(ITIMER_REAL, v1, NULL)); VERIFY_GUARD(v1); ALLOCATE_GUARD(v2, 1); test_assert(0 == setitimer(ITIMER_REAL, v1, v2)); test_assert(v2->it_interval.tv_sec == v1->it_interval.tv_sec); VERIFY_GUARD(v2); ALLOCATE_GUARD(v3, 2); test_assert(0 == getitimer(ITIMER_REAL, v3)); test_assert(v3->it_interval.tv_sec == v1->it_interval.tv_sec); VERIFY_GUARD(v3); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/setsid.c000066400000000000000000000004511265436462100153550ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { pid_t newsid; newsid = setsid(); atomic_printf("New session ID: %d\n", newsid); if (newsid >= 0) { atomic_puts("EXIT-SUCCESS"); } return 0; } rr-4.1.0/src/test/setuid.c000066400000000000000000000011101265436462100153500ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #include #include int main(int argc, char* argv[]) { uid_t orig; uid_t new; int ret; orig = getuid(); test_assert(0 == setuid(orig)); new = orig + 1; ret = setuid(new); if (ret == -1) { test_assert(errno == EPERM); atomic_puts("Test did nothing because process does not have CAP_SETUID?"); atomic_puts("EXIT-SUCCESS"); return 0; } else { test_assert(getuid() == new); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/shared_persistent_file.c000066400000000000000000000010651265436462100206110ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpoint(void) {} int main(int argc, char* argv[]) { int fd = open("/bin/sh", O_RDONLY); void* p = mmap(NULL, 4096, PROT_READ, MAP_SHARED, fd, 0); pid_t pid; int status; test_assert(fd >= 0); test_assert(p != MAP_FAILED); pid = fork(); breakpoint(); if (!pid) { return 77; } test_assert(pid == wait(&status)); test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 77); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/shared_persistent_file.py000066400000000000000000000004511265436462100210150ustar00rootroot00000000000000from rrutil import * import re send_gdb('b breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('check') expect_gdb('= 1') send_gdb('c') expect_gdb('xited normally') send_gdb('restart 1') expect_gdb('stopped') send_gdb('c') expect_gdb('xited normally') ok() rr-4.1.0/src/test/shared_persistent_file.run000066400000000000000000000000471265436462100211720ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/shm.c000066400000000000000000000050171265436462100146540ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" /* Make SIZE not a multiple of the page size, to ensure we handle that case. But make sure it's even, since we divide it by two. */ #define SIZE ((16 * PAGE_SIZE) - 10) static int shmid; static void before_writing(void) {} static void after_writing(void) {} static int run_child(void) { int i; char* p; char* p2; pid_t child2; int status; struct shmid_ds* ds; struct shminfo* info; struct shm_info* info2; ALLOCATE_GUARD(ds, 'd'); test_assert(0 == shmctl(shmid, IPC_STAT, ds)); VERIFY_GUARD(ds); test_assert(ds->shm_segsz == SIZE); test_assert(ds->shm_cpid == getppid()); test_assert(ds->shm_nattch == 0); ds->shm_perm.mode = 0660; test_assert(0 == shmctl(shmid, IPC_SET, ds)); ALLOCATE_GUARD(info, 'i'); test_assert(0 <= shmctl(shmid, IPC_INFO, (struct shmid_ds*)info)); VERIFY_GUARD(info); test_assert(info->shmmin == 1); ALLOCATE_GUARD(info2, 'j'); test_assert(0 <= shmctl(shmid, SHM_INFO, (struct shmid_ds*)info2)); VERIFY_GUARD(info2); test_assert(info2->used_ids > 0); test_assert(info2->used_ids < 1000000); p = shmat(shmid, NULL, 0); test_assert(p != (char*)-1); before_writing(); for (i = 0; i < SIZE; ++i) { test_assert(p[i] == 0); } memset(p, 'r', SIZE / 2); after_writing(); p2 = shmat(shmid, NULL, 0); test_assert(p2 != (char*)-1); memset(p + SIZE / 2, 'r', SIZE / 2); test_assert(0 == shmdt(p)); test_assert(0 == shmdt(p2)); test_assert(p == mmap(p, SIZE, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); test_assert(p[0] == 0); p = shmat(shmid, p, SHM_REMAP); test_assert(p != (char*)-1); for (i = 0; i < SIZE; ++i) { test_assert(p[i] == 'r'); } if ((child2 = fork()) == 0) { memset(p, 's', SIZE); return 0; } test_assert(child2 == waitpid(child2, &status, __WALL)); test_assert(0 == status); for (i = 0; i < SIZE; ++i) { test_assert(p[i] == 's'); } return 0; } int main(int argc, char* argv[]) { pid_t child; int status; shmid = shmget(IPC_PRIVATE, SIZE, 0666); test_assert(shmid >= 0); if ((child = fork()) == 0) { return run_child(); } atomic_printf("child %d\n", child); test_assert(child == waitpid(child, &status, __WALL)); /* delete the shm before testing status, because we want to ensure the segment is deleted even if the test failed. */ test_assert(0 == shmctl(shmid, IPC_RMID, NULL)); test_assert(status == 0); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/shm_checkpoint.py000066400000000000000000000006251265436462100172710ustar00rootroot00000000000000from rrutil import * send_gdb('b before_writing') expect_gdb('Breakpoint 1') send_gdb('b after_writing') expect_gdb('Breakpoint 2') send_gdb('c'); expect_gdb('Breakpoint 1, before_writing') send_gdb('checkpoint'); expect_gdb('= 1'); send_gdb('c'); expect_gdb('Breakpoint 2, after_writing') send_gdb('restart 1'); expect_gdb('stopped') send_gdb('c'); expect_gdb('Breakpoint 2, after_writing') ok() rr-4.1.0/src/test/shm_checkpoint.run000066400000000000000000000002171265436462100174420ustar00rootroot00000000000000source `dirname $0`/util.sh record shm$bitness TARGET_PID=$(grep 'child ' record.out | awk '{print $2}') debug shm_checkpoint "-f $TARGET_PID" rr-4.1.0/src/test/sigaction_old.c000066400000000000000000000013711265436462100167020ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void handler1(int sig, siginfo_t* si, void* p) {} static void handler2(int sig, siginfo_t* si, void* p) {} static void handler3(int sig, siginfo_t* si, void* p) {} int main(int argc, char* argv[]) { struct sigaction sa; struct sigaction old_sa; sa.sa_sigaction = handler1; sigemptyset(&sa.sa_mask); sa.sa_flags = SA_SIGINFO; sigaction(SIGUSR1, &sa, NULL); sa.sa_sigaction = handler2; old_sa.sa_sigaction = handler3; sigaction(SIGUSR1, &sa, &old_sa); test_assert(old_sa.sa_sigaction == handler1); sigaction(SIGUSR1, NULL, &old_sa); test_assert(old_sa.sa_sigaction == handler2); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sigaltstack.c000066400000000000000000000011631265436462100163740ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static char buf[128 * 1024]; int main(int argc, char* argv[]) { stack_t* ss; stack_t* oss; ALLOCATE_GUARD(ss, 'x'); ss->ss_sp = buf; ss->ss_flags = 0; ss->ss_size = sizeof(buf); test_assert(0 == sigaltstack(ss, NULL)); VERIFY_GUARD(ss); ALLOCATE_GUARD(oss, 'y'); test_assert(0 == sigaltstack(ss, oss)); test_assert(oss->ss_sp == buf); test_assert(oss->ss_flags == 0); test_assert(oss->ss_size == sizeof(buf)); VERIFY_GUARD(ss); VERIFY_GUARD(oss); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sigchld_interrupt_signal.c000066400000000000000000000011261265436462100211500ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { pid_t c; int dummy = 0, i; int status; atomic_puts("forking child"); if (0 == (c = fork())) { usleep(10000); atomic_puts("child exiting"); exit(0); } /* NO SYSCALLS AFTER HERE! (Up to the test_asserts.) */ for (i = 1; i < (1 << 28); ++i) { dummy += (dummy + i) % 9735; } test_assert(c == waitpid(c, &status, 0)); test_assert(WIFEXITED(status) && 0 == WEXITSTATUS(status)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sighandler_fork.c000066400000000000000000000012151265436462100172220ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void sighandler(int sig) { atomic_printf("caught signal %d, exiting\n", sig); } int main(int argc, char* argv[]) { pid_t c; signal(SIGCHLD, sighandler); atomic_puts("forking child"); if (0 == (c = fork())) { // Child usleep(10000); atomic_puts("forking grandchild"); if (0 == (c = fork())) { // Grandchild usleep(10000); exit(0); } waitpid(c, NULL, 0); return 0; } // Because why not. signal(SIGCHLD, NULL); waitpid(c, NULL, 0); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sigill.c000066400000000000000000000006571265436462100153550ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void sighandler(int sig) { atomic_printf("caught signal %d, exiting\n", sig); atomic_puts("EXIT-SUCCESS"); _exit(0); } int main(int argc, char* argv[]) { signal(SIGILL, sighandler); atomic_puts("running undefined instruction ..."); __asm__("ud2"); test_assert("should have terminated!" && 0); return 0; } rr-4.1.0/src/test/signal_checkpoint.py000066400000000000000000000005131265436462100177530ustar00rootroot00000000000000from rrutil import * send_gdb('b sighandler') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Program received signal SIGILL') expect_gdb('ud2') send_gdb('checkpoint') expect_gdb('= 1') send_gdb('c') expect_gdb('Breakpoint 1, sighandler') send_gdb("restart 1"); send_gdb('c') expect_gdb('Breakpoint 1, sighandler') ok() rr-4.1.0/src/test/signal_checkpoint.run000066400000000000000000000001121265436462100201220ustar00rootroot00000000000000source `dirname $0`/util.sh record sigill$bitness debug signal_checkpoint rr-4.1.0/src/test/signal_numbers.c000066400000000000000000000014421265436462100170730ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { int sig; for (sig = 1; sig <= 64; ++sig) { struct sigaction sa; /* Skip signals that are fatal and can't be ignored, and skip signals that rr uses for itself. */ if (sig == SIGKILL || sig == SIGSTOP || sig == SIGSTKFLT || sig == SIGPWR) { continue; } sa.sa_handler = SIG_IGN; sa.sa_flags = 0; sa.sa_restorer = NULL; sigemptyset(&sa.sa_mask); /* Avoid libc wrappers since glibc won't let us send certain signals that it reserves for itself */ test_assert(0 == syscall(SYS_rt_sigaction, sig, &sa, NULL, 8)); test_assert(0 == kill(getpid(), sig)); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/signal_numbers.py000066400000000000000000000014441265436462100173030ustar00rootroot00000000000000from rrutil import * gdb_signals = [ '', 'SIGHUP', 'SIGINT', 'SIGQUIT', 'SIGILL', 'SIGTRAP', 'SIGABRT', 'SIGBUS', 'SIGFPE', '#SIGKILL', 'SIGUSR1', 'SIGSEGV', 'SIGUSR2', 'SIGPIPE', 'SIGALRM', 'SIGTERM', '#SIGSTKFLT', 'SIGCHLD', 'SIGCONT', '#SIGSTOP', 'SIGTSTP', 'SIGTTIN', 'SIGTTOU', 'SIGURG', 'SIGXCPU', 'SIGXFSZ', 'SIGVTALRM', 'SIGPROF', 'SIGWINCH', 'SIGIO', '#SIGPWR', 'SIGSYS'] for sig in xrange(32,65): gdb_signals.append('SIG%d'%sig) for sig in xrange(1,65): gdb_sig = gdb_signals[sig] if not gdb_sig.startswith('#'): send_gdb('handle %s stop'%gdb_sig) if gdb_sig == 'SIGINT' or gdb_sig == 'SIGTRAP': send_gdb('y') send_gdb('c') expect_gdb('received signal %s'%gdb_sig) ok() rr-4.1.0/src/test/signal_numbers.run000066400000000000000000000000471265436462100174550ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/signal_stop.py000066400000000000000000000003241265436462100166110ustar00rootroot00000000000000from rrutil import * send_gdb('b sighandler') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Program received signal SIGILL') expect_gdb('ud2') send_gdb('c') expect_gdb('Breakpoint 1, sighandler') ok() rr-4.1.0/src/test/signal_stop.run000066400000000000000000000001041265436462100167610ustar00rootroot00000000000000source `dirname $0`/util.sh record sigill$bitness debug signal_stop rr-4.1.0/src/test/signalfd.c000066400000000000000000000011301265436462100156440ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(void) { sigset_t mask; int fd; struct signalfd_siginfo si; test_assert(0 == sigemptyset(&mask)); test_assert(0 == sigaddset(&mask, SIGURG)); fd = signalfd(-1, &mask, 0); test_assert(fd >= 0); test_assert(0 == sigprocmask(SIG_BLOCK, &mask, NULL)); test_assert(0 == kill(getpid(), SIGURG)); test_assert(sizeof(si) == read(fd, &si, sizeof(si))); test_assert(si.ssi_signo == SIGURG); test_assert(si.ssi_pid == getpid()); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sigprocmask.c000066400000000000000000000015141265436462100164050ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int signals_unblocked; static void handle_usr1(int sig) { atomic_puts("Caught usr1"); test_assert(signals_unblocked); } int main(int argc, char* argv[]) { sigset_t mask, oldmask; int i, dummy = 0; signal(SIGUSR1, handle_usr1); sigemptyset(&mask); sigaddset(&mask, SIGUSR1); /* The libc function invokes rt_sigprocmask. */ sigprocmask(SIG_BLOCK, &mask, &oldmask); raise(SIGUSR1); for (i = 0; i < 1 << 25; ++i) { dummy += (dummy + i) % 9735; } signals_unblocked = 1; /* Some systems only have rt_sigprocmask. */ #if defined(SYS_sigprocmask) syscall(SYS_sigprocmask, SIG_SETMASK, &oldmask, NULL); #else sigprocmask(SIG_SETMASK, &oldmask, NULL); #endif atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sigprocmask_in_syscallbuf_sighandler.c000066400000000000000000000020051265436462100235160ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int pipe_fds[2]; static void handle_signal(int sig) { sigset_t mask; atomic_puts("Caught SIGALRM"); sigemptyset(&mask); sigaddset(&mask, SIGUSR1); test_assert(0 == sigprocmask(SIG_BLOCK, &mask, NULL)); /* Syscallbuf should be still locked here. If it's not this could corrupt syscallbuf state. */ test_assert(2 == write(pipe_fds[1], "xx", 2)); } int main(int argc, char* argv[]) { struct sigaction sact; char buf; test_assert(0 == pipe(pipe_fds)); sigemptyset(&sact.sa_mask); sact.sa_flags = SA_RESTART; sact.sa_handler = handle_signal; test_assert(0 == sigaction(SIGALRM, &sact, NULL)); test_assert(0 == alarm(1)); /* If the syscallbuf state is corrupted by the signal handler we'll probably crash out here. */ test_assert(1 == read(pipe_fds[0], &buf, 1)); /* Or here */ test_assert(1 == read(pipe_fds[0], &buf, 1)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sigprocmask_syscallbuf.c000066400000000000000000000013471265436462100206400ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int pipe_fds[2]; int main(int argc, char* argv[]) { char buf; pid_t pid; int status; sigset_t mask; test_assert(0 == pipe(pipe_fds)); sigemptyset(&mask); sigfillset(&mask); test_assert(0 == sigprocmask(SIG_BLOCK, &mask, NULL)); /* Check that even when all signals are supposedly blocked, syscallbuf still works */ pid = fork(); if (!pid) { test_assert(1 == write(pipe_fds[1], "y", 1)); return 77; } test_assert(1 == read(pipe_fds[0], &buf, 1)); test_assert(pid == wait(&status)); test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 77); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sigqueueinfo.c000066400000000000000000000025721265436462100165730ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void queue_siginfo(int sig, int val) { siginfo_t si = { 0 }; si.si_code = SI_QUEUE; si.si_pid = getpid(); si.si_uid = geteuid(); si.si_value.sival_int = val; syscall(SYS_rt_sigqueueinfo, getpid(), sig, &si); } static void queue_siginfo_tg(int sig, int val) { siginfo_t si = { 0 }; si.si_code = SI_QUEUE; si.si_pid = getpid(); si.si_uid = geteuid(); si.si_value.sival_int = val; syscall(SYS_rt_tgsigqueueinfo, getpid(), getpid(), sig, &si); } static int usr1_val; static int usr2_val; static void handle_signal(int sig, siginfo_t* si, void* ctx) { int val = si->si_value.sival_int; if (SIGUSR1 == sig) { usr1_val = val; } else if (SIGUSR2 == sig) { usr2_val = val; } else { assert("Unexpected signal" && 0); } } int main(int argc, char* argv[]) { struct sigaction sa; sa.sa_sigaction = handle_signal; sigemptyset(&sa.sa_mask); sa.sa_flags = SA_SIGINFO; sigaction(SIGUSR1, &sa, NULL); sigaction(SIGUSR2, &sa, NULL); queue_siginfo(SIGUSR1, -42); test_assert(-42 == usr1_val); queue_siginfo(SIGUSR2, 12345); test_assert(12345 == usr2_val); queue_siginfo_tg(SIGUSR1, -43); test_assert(-43 == usr1_val); queue_siginfo_tg(SIGUSR2, 123456); test_assert(123456 == usr2_val); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sigreturn.c000066400000000000000000000132101265436462100161010ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" /* Test to ensure that sigreturn restores all necessary registers */ struct reg_ops { void (*set)(const void* p); void (*get)(void* p); }; static struct reg_ops xmm_ops[8]; static struct reg_ops st_ops[8]; /* The assignments to uint32_t* in the inline assembly statements below are because if we used *p in the asm constraints, GCC would think we were dereferencing a void pointer (!). */ #define DEFINE_XMM_HELPERS(i) \ void set_xmm##i(const void* p) { \ const uint32_t* x = p; \ asm("movaps %[ptr], %%xmm" #i : /* no outputs */ : [ptr] "m"(*x)); \ } \ void get_xmm##i(void* p) { \ uint32_t* x = p; \ asm("movaps %%xmm" #i ", %[ptr]" : [ptr] "=m"(*x) : /* no inputs */); \ } DEFINE_XMM_HELPERS(0) DEFINE_XMM_HELPERS(1) DEFINE_XMM_HELPERS(2) DEFINE_XMM_HELPERS(3) DEFINE_XMM_HELPERS(4) DEFINE_XMM_HELPERS(5) DEFINE_XMM_HELPERS(6) DEFINE_XMM_HELPERS(7) void set_st7(const void* p) { const uint32_t* x = p; asm("\tfinit\n" "\tfldt %[ptr]\n" "\tfst %%st(7)\n" : /* no outputs */ : [ptr] "m"(*x)); } void get_st7(void* p) { uint32_t* x = p; asm("\tfdecstp\n" "\tfstpt %[ptr]\n" : [ptr] "=m"(*x) : /* no inputs */); } #define DEFINE_ST_HELPERS(i) \ void set_st##i(const void* p) { \ const uint32_t* x = p; \ asm("\tfinit\n" \ "\tfldt %[ptr]\n" \ "\tfst %%st(" #i ")\n" \ : /* no outputs */ \ : [ptr] "m"(*x)); \ } \ void get_st##i(void* p) { \ uint32_t* x = p; \ asm("\tfld %%st(" #i ")\n" \ "\tfstpt %[ptr]\n" \ : [ptr] "=m"(*x) \ : /* no inputs */); \ } DEFINE_ST_HELPERS(0) DEFINE_ST_HELPERS(1) DEFINE_ST_HELPERS(2) DEFINE_ST_HELPERS(3) DEFINE_ST_HELPERS(4) DEFINE_ST_HELPERS(5) DEFINE_ST_HELPERS(6) static void init(void) { #define INIT(i) \ xmm_ops[i].set = set_xmm##i; \ xmm_ops[i].get = get_xmm##i; \ st_ops[i].set = set_st##i; \ st_ops[i].get = get_st##i; INIT(0) INIT(1) INIT(2) INIT(3) INIT(4) INIT(5) INIT(6) INIT(7) } #define GOOD 0x12345678 #define BAD 0xFEDCBA98 #define XMM_SIZE 16 #define XMM_ALIGNMENT __attribute__((aligned(XMM_SIZE))) static const int xmm_good[XMM_SIZE / sizeof(int)] XMM_ALIGNMENT = { GOOD, GOOD + 1, GOOD + 2, GOOD + 3 }; static const int xmm_bad[XMM_SIZE / sizeof(int)] XMM_ALIGNMENT = { BAD, BAD + 1, BAD + 2, BAD + 3 }; #define ST_SIZE 10 static long double st_good = 12345678.90; static long double st_bad = -1.23456789; static int regnum; static void handle_usr1_xmm(int sig) { int xmm[XMM_SIZE / sizeof(int)] XMM_ALIGNMENT; xmm_ops[regnum].get(xmm); /* Print incoming xmm value to ensure any modifications made while entering the signal handler are replayed correctly */ atomic_printf("xmm %d incoming: %x %x %x %x\n", regnum, xmm[0], xmm[1], xmm[2], xmm[3]); /* Try to corrupt register, to see if it gets restored */ xmm_ops[regnum].set(xmm_bad); } static void handle_usr1_st(int sig) { char st[ST_SIZE]; st_ops[regnum].get(st); /* Print incoming st value to ensure any modifications made while entering the signal handler are replayed correctly */ atomic_printf("st %d incoming: %x %x %x\n", regnum, *((int*)(st)), *((int*)(st + 4)), *((short*)(st + 8)) & 0xffff); /* Try to corrupt register, to see if it gets restored */ st_ops[regnum].set(&st_bad); } int main(int argc, char* argv[]) { init(); signal(SIGUSR1, handle_usr1_xmm); for (regnum = 0; regnum < 8; ++regnum) { int xmm[XMM_SIZE / sizeof(int)] XMM_ALIGNMENT; xmm_ops[regnum].set(xmm_good); raise(SIGUSR1); memcpy(xmm, xmm_bad, sizeof(xmm)); xmm_ops[regnum].get(xmm); test_assert("XMM register should have been preserved" && memcmp(xmm, xmm_good, sizeof(xmm)) == 0); } signal(SIGUSR1, handle_usr1_st); for (regnum = 0; regnum < 8; ++regnum) { char st[ST_SIZE]; st_ops[regnum].set(&st_good); raise(SIGUSR1); memcpy(st, &st_bad, sizeof(st)); st_ops[regnum].get(st); test_assert("ST register should have been preserved" && memcmp(st, &st_good, sizeof(st)) == 0); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sigreturn_reg.c000066400000000000000000000017541265436462100167500ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int caught_sig = 0; long long v = 99; void catcher(int signum, siginfo_t* siginfo_ptr, void* ucontext_ptr) { #ifdef __x86_64__ ucontext_t* ctx = (ucontext_t*)ucontext_ptr; test_assert(ctx->uc_mcontext.gregs[REG_RCX] == 0); test_assert(ctx->uc_mcontext.gregs[REG_RDI] == 0); ctx->uc_mcontext.gregs[REG_RDI] = (long long)&v; #endif caught_sig = signum; } int main(int argc, char** argv) { struct sigaction sact; long long ax = v; long long cx = 0; sigemptyset(&sact.sa_mask); sact.sa_flags = SA_SIGINFO; sact.sa_sigaction = catcher; sigaction(SIGSEGV, &sact, NULL); #ifdef __x86_64__ ax = 0; __asm__("\txor %%rdi,%%rdi\n" "\txor %%rcx,%%rcx\n" "\tmov (%%rdi),%%rax\n" : "=c"(cx), "=a"(ax) :); test_assert(caught_sig == SIGSEGV); #endif test_assert(cx == 0); test_assert(ax == v); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sigrt.c000066400000000000000000000027431265436462100152200ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpoint(void) { int break_here = 1; (void)break_here; } static int num_signals_caught; static void handle_sigrt(int sig) { atomic_printf("Caught signal %d\n", sig); ++num_signals_caught; } static void my_raise(int sig) { /* Don't call raise() directly, since that can go through our syscall hooks which mess up gdb's reverse-finish slightly. */ #ifdef __i386__ int tid = getpid(); /* Use a special instruction after the syscall to make sure we don't patch it */ __asm__ __volatile__("xchg %%ebx,%%edi\n\t" "int $0x80\n\t" "xchg %%ebx,%%edi\n\t" ::"a"(SYS_tgkill), "c"(tid), "d"(sig), "D"(tid)); #elif defined(__x86_64__) int tid = getpid(); /* Use a special instruction after the syscall to make sure we don't patch it */ __asm__ __volatile__("syscall\n\t" "xchg %%rdx,%%rdx\n\t" ::"a"(SYS_tgkill), "D"(tid), "S"(tid), "d"(sig)); #else raise(sig); #endif } int main(int argc, char* argv[]) { int i; for (i = SIGRTMIN; i <= SIGRTMAX; ++i) { breakpoint(); signal(i, handle_sigrt); my_raise(i); } atomic_printf("caught %d signals; expected %d\n", num_signals_caught, 1 + SIGRTMAX - SIGRTMIN); test_assert(1 + SIGRTMAX - SIGRTMIN == num_signals_caught); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sigstop.c000066400000000000000000000010231265436462100155460ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { pid_t child; int status = -1; struct timespec ts = { 0, 50000000 }; if (0 == (child = fork())) { kill(getpid(), SIGSTOP); assert(0 && "child should not resume"); return 77; } nanosleep(&ts, NULL); test_assert(0 == waitpid(child, &status, WNOHANG)); test_assert(-1 == status); test_assert(0 == kill(child, SIGKILL)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sigstop2.c000066400000000000000000000010251265436462100156320ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { pid_t child; int status = -1; struct timespec ts = { 0, 50000000 }; if (0 == (child = fork())) { nanosleep(&ts, NULL); kill(getpid(), SIGSTOP); assert(0 && "child should not resume"); return 77; } test_assert(0 == waitpid(child, &status, WNOHANG)); test_assert(-1 == status); test_assert(0 == kill(child, SIGKILL)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sigsuspend.c000066400000000000000000000032251265436462100162500ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void* thread(void* p) { sigset_t mask; sigemptyset(&mask); sigaddset(&mask, SIGUSR1); sigaddset(&mask, SIGUSR2); test_assert(0 == pthread_sigmask(SIG_BLOCK, &mask, NULL)); test_assert(0 == kill(getpid(), SIGUSR1)); test_assert(0 == kill(getpid(), SIGUSR2)); return NULL; } static int usr1_hit; static int usr2_hit; static void handle_signal(int sig, siginfo_t* si, void* ctx) { if (SIGUSR1 == sig) { ++usr1_hit; } else if (SIGUSR2 == sig) { ++usr2_hit; } else { assert("Unexpected signal" && 0); } } int main(int argc, char* argv[]) { struct sigaction sa; pthread_t t; sigset_t mask; int ret; struct timespec ts; siginfo_t si; sa.sa_sigaction = handle_signal; sigemptyset(&sa.sa_mask); sa.sa_flags = SA_SIGINFO; sigaction(SIGUSR1, &sa, NULL); sigaction(SIGUSR2, &sa, NULL); sigemptyset(&mask); sigaddset(&mask, SIGUSR1); sigaddset(&mask, SIGUSR2); test_assert(0 == pthread_sigmask(SIG_BLOCK, &mask, NULL)); pthread_create(&t, NULL, thread, NULL); sigemptyset(&mask); sigaddset(&mask, SIGUSR1); sigsuspend(&mask); test_assert(usr1_hit == 0); test_assert(usr2_hit == 1); test_assert(0 == sigpending(&mask)); test_assert(1 == sigismember(&mask, SIGUSR1)); test_assert(0 == sigismember(&mask, SIGUSR2)); ts.tv_sec = 5; ts.tv_nsec = 0; ret = sigtimedwait(&mask, &si, &ts); atomic_printf("Signal %d became pending\n", ret); test_assert(SIGUSR1 == ret); test_assert(si.si_signo == SIGUSR1); test_assert(si.si_code == SI_USER); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sigtrap.c000066400000000000000000000005101265436462100155270ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void handle_sigtrap(int sig) { atomic_puts("EXIT-SUCCESS"); _exit(0); } int main(int argc, char* argv[]) { signal(SIGTRAP, handle_sigtrap); atomic_puts("raising SIGTRAP ..."); raise(SIGTRAP); return 0; } rr-4.1.0/src/test/simple.c000066400000000000000000000002431265436462100153520ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(void) { atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/simple_script.run000066400000000000000000000001351265436462100173200ustar00rootroot00000000000000source `dirname $0`/util.sh just_record $TESTDIR/simple_script.sh replay check EXIT-SUCCESS rr-4.1.0/src/test/simple_script.sh000077500000000000000000000000351265436462100171300ustar00rootroot00000000000000#!/bin/sh echo EXIT-SUCCESS rr-4.1.0/src/test/simple_script_debug.py000066400000000000000000000002011265436462100203040ustar00rootroot00000000000000from rrutil import * send_gdb('b __libc_start_main') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') ok() rr-4.1.0/src/test/simple_script_debug.run000066400000000000000000000001741265436462100204710ustar00rootroot00000000000000source `dirname $0`/util.sh just_record $TESTDIR/simple_script.sh debug simple_script_debug "--onprocess simple_script.sh" rr-4.1.0/src/test/simple_winch.py000066400000000000000000000005371265436462100167560ustar00rootroot00000000000000from rrutil import * # Signal all processes in the process group send_gdb('!kill -WINCH 0') send_gdb('c') expect_rr('EXIT-SUCCESS') expect_gdb('exited normally') send_gdb('break main') expect_gdb('Breakpoint 1') send_gdb('run') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('!kill -WINCH 0') send_gdb('reverse-cont') expect_gdb('stopped') ok() rr-4.1.0/src/test/simple_winch.run000066400000000000000000000001051265436462100171210ustar00rootroot00000000000000source `dirname $0`/util.sh record simple$bitness debug simple_winch rr-4.1.0/src/test/sioc.c000066400000000000000000000121341265436462100150200ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" const char* sockaddr_name(const struct sockaddr* addr) { const struct sockaddr_in* sin = (const struct sockaddr_in*)addr; static char str[PATH_MAX]; /* FIXME: add INET6 support (original author didn't * have ipv6 iface available to test). */ test_assert(AF_INET == addr->sa_family); return inet_ntop(AF_INET, (void*)&sin->sin_addr, str, sizeof(str)); } const char* sockaddr_hw_name(const struct sockaddr* addr) { static char str[PATH_MAX]; const unsigned char* data = (const unsigned char*)addr->sa_data; test_assert(AF_LOCAL == addr->sa_family); sprintf(str, "%02x:%02x:%02x:%02x:%02x:%02x", data[0], data[1], data[2], data[3], data[4], data[5]); return str; } /** * Fetch and print the ifconfig for this machine. Fill in * |req.ifr_name| with the first non-loopback interface name found. */ static void get_ifconfig(int sockfd, struct ifreq* req) { struct ifreq ifaces[100]; struct ifconf ifconf; int ret; ssize_t num_ifaces; int i; int set_req_iface = 0; ifconf.ifc_len = sizeof(ifaces); ifconf.ifc_req = ifaces; ret = ioctl(sockfd, SIOCGIFCONF, &ifconf); num_ifaces = ifconf.ifc_len / sizeof(ifaces[0]); atomic_printf("SIOCGIFCONF(ret %d): %zd ifaces (%d bytes of ifreq)\n", ret, num_ifaces, ifconf.ifc_len); test_assert(0 == ret); test_assert(0 == (ifconf.ifc_len % sizeof(ifaces[0]))); for (i = 0; i < num_ifaces; ++i) { const struct ifreq* ifc = &ifconf.ifc_req[i]; atomic_printf(" iface %d: name:%s addr:%s\n", i, ifc->ifr_name, sockaddr_name(&ifc->ifr_addr)); if (!set_req_iface && strcmp("lo", ifc->ifr_name)) { strcpy(req->ifr_name, ifc->ifr_name); set_req_iface = 1; } } if (!set_req_iface) { atomic_puts("Only loopback interface found\n"); atomic_puts("EXIT-SUCCESS"); exit(0); } } int main(int argc, char* argv[]) { int sockfd = socket(AF_INET, SOCK_DGRAM, 0); struct ifreq req; char name[PATH_MAX]; int index; struct ethtool_cmd etc; int err, ret; struct iwreq wreq; get_ifconfig(sockfd, &req); strcpy(name, req.ifr_name); req.ifr_ifindex = -1; strcpy(req.ifr_name, name); ret = ioctl(sockfd, SIOCGIFINDEX, &req); atomic_printf("SIOCGIFINDEX(ret:%d): %s index is %d\n", ret, req.ifr_name, req.ifr_ifindex); test_assert(0 == ret); index = req.ifr_ifindex; memset(&req.ifr_name, 0x5a, sizeof(req.ifr_name)); req.ifr_ifindex = index; ret = ioctl(sockfd, SIOCGIFNAME, &req); atomic_printf("SIOCGIFNAME(ret:%d): index %d(%s) name is %s\n", ret, index, name, req.ifr_name); test_assert(0 == ret); test_assert(!strcmp(name, req.ifr_name)); memset(&req.ifr_addr, 0x5a, sizeof(req.ifr_addr)); ret = ioctl(sockfd, SIOCGIFADDR, &req); atomic_printf("SIOCGIFADDR(ret:%d): %s addr is", ret, req.ifr_name); atomic_printf(" %s\n", sockaddr_name(&req.ifr_addr)); test_assert(0 == ret); memset(&req.ifr_addr, 0x5a, sizeof(req.ifr_addr)); ret = ioctl(sockfd, SIOCGIFHWADDR, &req); atomic_printf("SIOCGIFHWADDR(ret:%d): %s addr is", ret, req.ifr_name); atomic_printf(" %s\n", sockaddr_hw_name(&req.ifr_addr)); test_assert(0 == ret); memset(&req.ifr_flags, 0x5a, sizeof(req.ifr_flags)); ret = ioctl(sockfd, SIOCGIFFLAGS, &req); atomic_printf("SIOCGIFFLAGS(ret:%d): %s flags are", ret, req.ifr_name); test_assert(0 == ret); atomic_printf(" %#x\n", req.ifr_flags); memset(&req.ifr_flags, 0x5a, sizeof(req.ifr_mtu)); ret = ioctl(sockfd, SIOCGIFMTU, &req); atomic_printf("SIOCGIFMTU(ret:%d): %s MTU is", ret, req.ifr_name); test_assert(0 == ret); atomic_printf(" %d\n", req.ifr_mtu); memset(&etc, 0, sizeof(etc)); etc.cmd = ETHTOOL_GSET; req.ifr_data = (char*)&etc; ret = ioctl(sockfd, SIOCETHTOOL, &req); err = errno; atomic_printf("SIOCETHTOOL(ret:%d): %s ethtool data:\n", ret, req.ifr_name); atomic_printf(" speed:%#x duplex:%#x port:%#x physaddr:%#x, maxtxpkt:%u " "maxrxpkt:%u ...\n", ethtool_cmd_speed(&etc), etc.duplex, etc.port, etc.phy_address, etc.maxtxpkt, etc.maxrxpkt); if (-1 == ret) { atomic_printf("WARNING: %s doesn't appear to support SIOCETHTOOL; the test " "may have been meaningless (%s/%d)\n", name, strerror(err), err); test_assert(EOPNOTSUPP == err || EPERM == err); } memset(&wreq, 0x5a, sizeof(wreq)); strcpy(wreq.ifr_ifrn.ifrn_name, name); ret = ioctl(sockfd, SIOCGIWRATE, &wreq); err = errno; atomic_printf("SIOCGIWRATE(ret:%d): %s:\n", ret, wreq.ifr_name); atomic_printf(" bitrate:%d (fixed? %s; disabled? %s) flags:%#x\n", wreq.u.bitrate.value, wreq.u.bitrate.fixed ? "yes" : "no", wreq.u.bitrate.disabled ? "yes" : "no", wreq.u.bitrate.flags); if (-1 == ret) { atomic_printf("WARNING: %s doesn't appear to be a wireless iface; " "SIOCGIWRATE test may have been meaningless (%s/%d)\n", name, strerror(err), err); test_assert(EOPNOTSUPP == err || EPERM == err); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sock_names_opts.c000066400000000000000000000047261265436462100172620ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void client(const struct sockaddr_un* addr) { int clientfd; char c; struct sockaddr_un got_peer_addr; socklen_t got_peer_addr_len = sizeof(got_peer_addr); clientfd = socket(AF_UNIX, SOCK_STREAM, 0); test_assert(clientfd >= 0); test_assert(0 == connect(clientfd, (struct sockaddr*)addr, sizeof(*addr))); test_assert(0 == getpeername(clientfd, &got_peer_addr, &got_peer_addr_len)); test_assert(got_peer_addr_len > 0 && got_peer_addr_len <= sizeof(got_peer_addr)); test_assert(0 == memcmp(&got_peer_addr, addr, got_peer_addr_len)); test_assert(1 == read(clientfd, &c, 1)); test_assert(c == '!'); exit(7); } int main(int argc, char* argv[]) { struct sockaddr_un addr; struct sockaddr_un got_name; socklen_t got_name_len = sizeof(got_name); int listenfd; int servefd; struct sockaddr_un peer_addr; socklen_t peer_addr_len = sizeof(peer_addr); pid_t child; int on = 1; int got_opt = -1; socklen_t got_opt_len = sizeof(got_opt); int status; memset(&addr, 0, sizeof(addr)); addr.sun_family = AF_UNIX; strncpy(addr.sun_path, "socket.unix", sizeof(addr.sun_path) - 1); test_assert(0 <= (listenfd = socket(AF_UNIX, SOCK_STREAM, 0))); test_assert(0 == bind(listenfd, (struct sockaddr*)&addr, sizeof(addr))); test_assert(0 == getsockname(listenfd, &got_name, &got_name_len)); test_assert(got_name_len > 0 && got_name_len <= sizeof(got_name)); test_assert(0 == memcmp(&addr, &got_name, got_name_len)); test_assert(0 == listen(listenfd, 1)); if (0 == (child = fork())) { client(&addr); test_assert("Not reached" && 0); } test_assert(0 <= (servefd = accept(listenfd, &peer_addr, &peer_addr_len))); test_assert(0 == getsockopt(servefd, SOL_SOCKET, SO_PASSCRED, &got_opt, &got_opt_len)); test_assert(got_opt_len == sizeof(got_opt)); test_assert(got_opt == 0); test_assert(0 == setsockopt(servefd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))); test_assert(0 == getsockopt(servefd, SOL_SOCKET, SO_PASSCRED, &got_opt, &got_opt_len)); test_assert(got_opt_len == sizeof(got_opt)); test_assert(got_opt == 1); test_assert(1 == write(servefd, "!", 1)); test_assert(child == waitpid(child, &status, 0)); test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 7); unlink(addr.sun_path); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/splice.c000066400000000000000000000026011265436462100153400ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define TOKEN "ABC" #define TOKEN_SIZE sizeof(TOKEN) static const char token_file[] = "rr-splice-file.txt"; void verify_token(int fd) { ssize_t len; char buf[TOKEN_SIZE]; len = read(fd, buf, sizeof(buf)); if (len != TOKEN_SIZE || strcmp(buf, TOKEN)) { atomic_puts("Internal error: FAILED: splice wrote the wrong data"); exit(1); } atomic_puts("Got expected token " TOKEN); } int main(void) { int pipefds[2]; int filefd; loff_t off; ssize_t nmoved; filefd = open(token_file, O_RDWR | O_CREAT | O_TRUNC, 0600); pipe2(pipefds, 0 /*no flags*/); write(pipefds[1], TOKEN, TOKEN_SIZE); off = 0; nmoved = splice(pipefds[0], NULL, filefd, &off, TOKEN_SIZE, 0 /*no flags*/); atomic_printf( "spliced %zd bytes from %d to %d; off changed from 0 to %" PRId64 "\n", nmoved, pipefds[0], filefd, off); lseek(filefd, 0, SEEK_SET); verify_token(filefd); off = 0; nmoved = splice(filefd, &off, pipefds[1], NULL, TOKEN_SIZE, 0 /*no flags*/); atomic_printf( "spliced %zd bytes from %d to %d; off changed from 0 to %" PRId64 "\n", nmoved, filefd, pipefds[1], off); verify_token(pipefds[0]); /* The test driver will clean up after us if the test failed * before this. */ unlink(token_file); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/stack_growth.c000066400000000000000000000007211265436462100165610ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static volatile int v = 0; static void breakpoint(void) {} static void funcall(void) { char buf[2000000]; int i; breakpoint(); for (i = 0; i < sizeof(buf); ++i) { buf[i] = (char)i; } for (i = 0; i < sizeof(buf); ++i) { v += buf[i % 777777]; } } int main(int argc, char* argv[]) { funcall(); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/stack_growth.py000066400000000000000000000004451265436462100167720ustar00rootroot00000000000000from rrutil import * send_gdb('break breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('finish') send_gdb('watch -l buf[100]') expect_gdb('Hardware[()/a-z ]+watchpoint 2') send_gdb('c') expect_gdb('Old value = 0') expect_gdb('New value = 100') ok() rr-4.1.0/src/test/stack_growth.run000066400000000000000000000000471265436462100171440ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/stack_growth_after_syscallbuf.c000066400000000000000000000013351265436462100221730ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int recurse(int count) { if (count == 0) { return 77; } return recurse(count - 1) * 99; } int main(int argc, char* argv[]) { struct rlimit limit; int fd = open("/dev/zero", O_RDONLY); char ch; test_assert(fd >= 0); test_assert(0 == getrlimit(RLIMIT_STACK, &limit)); limit.rlim_cur = RLIM_INFINITY; /* This could fail; that's OK. We just want to try to test an unlimited stack size. */ setrlimit(RLIMIT_STACK, &limit); test_assert(1 == read(fd, &ch, 1)); test_assert(1 == read(fd, &ch, 1)); atomic_printf("recurse=%d\n", recurse(10000)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/stack_overflow.c000066400000000000000000000025671265436462100171240ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int* depth; static int recurse(void) { int result; ++*depth; if (*depth > 10000000) { return 3; } result = recurse() * 13 + 1; --*depth; return result; } static void SEGV_handler(int sig, siginfo_t* si, void* context) { atomic_puts( "Should not reach SEGV handler, since there's no safe altstack to use"); exit(1); } int main(int argc, char* argv[]) { pid_t child; int status; depth = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); test_assert(depth != MAP_FAILED); child = fork(); if (!child) { /* Testing shows that the output value of |depth| is not very sensitive to small values of the limit, but it's very sensitive around the 500K mark. */ struct rlimit r = { 500000, 500000 }; struct sigaction act; act.sa_sigaction = SEGV_handler; act.sa_flags = SA_SIGINFO; sigemptyset(&act.sa_mask); test_assert(0 == sigaction(SIGSEGV, &act, NULL)); test_assert(0 == setrlimit(RLIMIT_STACK, &r)); return recurse(); } atomic_printf("child %d\n", child); test_assert(wait(&status) == child); test_assert(WIFSIGNALED(status) && WTERMSIG(status) == SIGSEGV); atomic_printf("depth = %d\n", *depth); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/stack_overflow_altstack.c000066400000000000000000000021021265436462100207730ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int depth = 0; static char buf[SIGSTKSZ]; static void SEGV_handler(int sig, siginfo_t* si, void* context) { atomic_printf("depth = %d\n", depth); atomic_puts("EXIT-SUCCESS"); exit(0); } static int recurse(void) { int result; ++depth; if (depth > 10000000) { return 3; } result = recurse() * 13 + 1; --depth; return result; } int main(int argc, char* argv[]) { /* Testing shows that the output value of |depth| is not very sensitive to small values of the limit, but it's very sensitive around the 500K mark. */ struct rlimit r = { 500000, 500000 }; struct sigaction act; stack_t stack; stack.ss_flags = 0; stack.ss_size = sizeof(buf); stack.ss_sp = buf; test_assert(0 == sigaltstack(&stack, NULL)); act.sa_sigaction = SEGV_handler; act.sa_flags = SA_SIGINFO | SA_ONSTACK; sigemptyset(&act.sa_mask); test_assert(0 == sigaction(SIGSEGV, &act, NULL)); test_assert(0 == setrlimit(RLIMIT_STACK, &r)); return recurse(); } rr-4.1.0/src/test/stack_overflow_debug.py000066400000000000000000000005051265436462100204660ustar00rootroot00000000000000from rrutil import * send_gdb('handle SIGKILL stop') send_gdb('c') expect_gdb('SIGSEGV') send_gdb('stepi') expect_gdb('SIGSEGV') send_gdb('stepi') expect_gdb('SIGKILL') send_gdb('reverse-stepi') expect_gdb('SIGSEGV') send_gdb('reverse-stepi') expect_gdb('SIGSEGV') send_gdb('reverse-continue') expect_gdb('stopped') ok() rr-4.1.0/src/test/stack_overflow_debug.run000066400000000000000000000003161265436462100206420ustar00rootroot00000000000000source `dirname $0`/util.sh record stack_overflow$bitness TARGET_PID=$(grep 'child ' record.out | awk '{print $2}') echo Targeting recorded pid $TARGET_PID ... debug stack_overflow_debug "-f $TARGET_PID" rr-4.1.0/src/test/stack_overflow_with_guard.c000066400000000000000000000025001265436462100213240ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int* depth; static int recurse(void) { int result; ++*depth; if (*depth > 10000000) { return 3; } result = recurse() * 13 + 1; --*depth; return result; } static void SEGV_handler(int sig, siginfo_t* si, void* context) { atomic_puts( "Should not reach SEGV handler, since there's no safe altstack to use"); exit(1); } int main(int argc, char* argv[]) { pid_t child; int status; depth = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); test_assert(depth != MAP_FAILED); child = fork(); if (!child) { struct sigaction act; int* fake_sp = &argc; act.sa_sigaction = SEGV_handler; act.sa_flags = SA_SIGINFO; sigemptyset(&act.sa_mask); test_assert(0 == sigaction(SIGSEGV, &act, NULL)); void* p = (void*)((size_t)(fake_sp - 8 * PAGE_SIZE) & ~(size_t)(PAGE_SIZE - 1)); test_assert(mmap(p, PAGE_SIZE, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0) == p); return recurse(); } test_assert(wait(&status) == child); test_assert(WIFSIGNALED(status) && WTERMSIG(status) == SIGSEGV); atomic_printf("depth = %d\n", *depth); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/statfs.c000066400000000000000000000034751265436462100153770ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define DUMMY_FILENAME "foo.txt" static void dump_statfs(const char* label, const struct statfs* s) { atomic_printf("%s: {\n" " type:0x%lx, bsize:%ld, \n" " blocks:%ld, bfree:%ld, bavail:%ld,\n" " files:%lu, ffree:%lu,\n" " fsid: { %d, %d },\n" " namelen:%ld, frsize:%ld,\n" " flags:0x%lx\n" "}\n", label, (long)s->f_type, (long)s->f_bsize, s->f_blocks, s->f_bfree, s->f_bavail, s->f_files, s->f_ffree, s->f_fsid.__val[0], s->f_fsid.__val[1], (long)s->f_namelen, (long)s->f_frsize, (long)s->f_flags); } static int same_statfs_det(const struct statfs* s1, const struct statfs* s2) { /* Only compare the ~deterministic members; the free/avail * resource members can change in between calls. */ return (s1->f_type == s2->f_type && s1->f_bsize == s2->f_bsize && s1->f_blocks == s2->f_blocks && s1->f_fsid.__val[0] == s2->f_fsid.__val[0] && s1->f_fsid.__val[1] == s2->f_fsid.__val[1] && s1->f_namelen == s2->f_namelen && s1->f_frsize == s2->f_frsize && s1->f_flags == s2->f_flags); } int main(void) { int fd; struct statfs* sfs1; struct statfs* sfs2; ALLOCATE_GUARD(sfs1, 0); ALLOCATE_GUARD(sfs2, 1); fd = creat(DUMMY_FILENAME, 0600); test_assert(fd >= 0); test_assert(0 == statfs(DUMMY_FILENAME, sfs1)); test_assert(0 == fstatfs(fd, sfs2)); VERIFY_GUARD(sfs1); VERIFY_GUARD(sfs2); dump_statfs("statfs buffer", sfs1); dump_statfs("fstatfs buffer", sfs2); test_assert(same_statfs_det(sfs1, sfs2)); unlink(DUMMY_FILENAME); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/stdout_child.c000066400000000000000000000006761265436462100165600ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { pid_t child; int status; if (argc == 2) { atomic_puts("EXIT-SUCCESS"); return 77; } if (0 == (child = fork())) { execl(argv[0], argv[0], "step2", NULL); } test_assert(child == waitpid(child, &status, 0)); test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 77); return 0; } rr-4.1.0/src/test/stdout_cloexec.c000066400000000000000000000014561265436462100171140ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { pid_t child; int status; if (argc == 2) { /* With syscallbuf disabled, this should open on fd 1. Then, the following puts will succeed, but rr should not echo to the terminal during replay, as long as our CLOEXEC handling works. */ open("/dev/null", O_WRONLY); atomic_puts("FAILED: this output should be hidden"); return 77; } if (0 == (child = fork())) { test_assert(0 == fcntl(STDOUT_FILENO, F_SETFD, FD_CLOEXEC)); execl(argv[0], argv[0], "step2", NULL); } test_assert(child == waitpid(child, &status, 0)); test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 77); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/stdout_dup.c000066400000000000000000000004071265436462100162550ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { int fd = dup(STDOUT_FILENO); static const char msg[] = "EXIT-SUCCESS\n"; write(fd, msg, sizeof(msg) - 1); return 0; } rr-4.1.0/src/test/stdout_redirect.c000066400000000000000000000011641265436462100172670ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { pid_t child; int status; if (argc == 2) { atomic_puts("FAILED: this output should be hidden"); return 77; } if (0 == (child = fork())) { int fd = open("/dev/null", O_WRONLY); test_assert(fd >= 0); test_assert(STDOUT_FILENO == dup2(fd, STDOUT_FILENO)); execl(argv[0], argv[0], "step2", NULL); } test_assert(child == waitpid(child, &status, 0)); test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 77); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/step1.py000066400000000000000000000005541265436462100153300ustar00rootroot00000000000000from rrutil import * send_gdb('b A') expect_gdb('Breakpoint 1') send_gdb('c') expect_rr('calling A') expect_gdb('Breakpoint 1, A') send_gdb('n') expect_rr('calling B') send_gdb('s') expect_gdb('B ()') send_gdb('n') expect_rr('calling C') send_gdb('s') expect_gdb('C ()') send_gdb('bt') expect_gdb('#0[^C]+C[^#]+#1[^B]+B[^#]+#2[^A]+A[^#]+#3[^m]+main') ok() rr-4.1.0/src/test/step1.run000066400000000000000000000001021265436462100154710ustar00rootroot00000000000000source `dirname $0`/util.sh record breakpoint$bitness debug step1 rr-4.1.0/src/test/step_rdtsc.py000066400000000000000000000007241265436462100164450ustar00rootroot00000000000000import re from rrutil import * send_gdb('b rdtsc') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1, rdtsc') send_gdb('disass') expect_gdb(re.compile(r'=> ([0-9a-fx]+) <\+[0-9]+>:\trdtsc')) addr = last_match().group(1) send_gdb('stepi') send_gdb('disass') expect_gdb(re.compile(r'=> ([0-9a-fx]+) ')) addr2 = last_match().group(1) if eval(addr) + 2 != eval(addr2): failed("stepi from rdtsc at %s ended at incorrect %s" % (addr, addr2)); ok() rr-4.1.0/src/test/step_rdtsc.run000066400000000000000000000001021265436462100166070ustar00rootroot00000000000000source `dirname $0`/util.sh record rdtsc$bitness debug step_rdtsc rr-4.1.0/src/test/step_signal.py000066400000000000000000000005741265436462100166060ustar00rootroot00000000000000from rrutil import * import re send_gdb('b breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1, breakpoint') send_gdb('fin') expect_gdb(r'signal\(i, handle_sigrt\)') send_gdb('n') expect_gdb(r'raise\(i\)') send_gdb('n') expect_gdb('Program received signal SIG34') send_gdb('stepi') send_gdb('n') expect_gdb(r'atomic_printf\("Caught signal') ok() rr-4.1.0/src/test/step_signal.run000066400000000000000000000001031265436462100167460ustar00rootroot00000000000000source `dirname $0`/util.sh record sigrt$bitness debug step_signal rr-4.1.0/src/test/step_thread.c000066400000000000000000000021651265436462100163700ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" pthread_barrier_t bar; /* NB: these must *not* be macros so that debugger step-next works as * expected per the program source. */ static void A(void) { pthread_barrier_wait(&bar); pthread_barrier_wait(&bar); } static void B(void) { pthread_barrier_wait(&bar); pthread_barrier_wait(&bar); } static void* threadA(void* unused) { A(); return NULL; } static void* threadB(void* unused) { B(); return NULL; } static void C(void) { pthread_barrier_wait(&bar); } static void hit_barrier(void) { int break_here = 1; (void)break_here; atomic_puts("hit barrier"); } static void ready(void) { int break_here = 1; (void)break_here; } int main(void) { void* dummy; pthread_t a, b; pthread_barrier_init(&bar, NULL, 3); dummy = pthread_create; dummy = pthread_barrier_wait; (void)dummy; ready(); pthread_create(&a, NULL, threadA, NULL); pthread_create(&b, NULL, threadB, NULL); C(); hit_barrier(); pthread_barrier_wait(&bar); pthread_join(a, NULL); pthread_join(b, NULL); return 0; } rr-4.1.0/src/test/step_thread.py000066400000000000000000000031421265436462100165720ustar00rootroot00000000000000import re from rrutil import * send_gdb('b hit_barrier') expect_gdb('Breakpoint 1') send_gdb('b ready') expect_gdb('Breakpoint 2') send_gdb('c') expect_gdb('Breakpoint 2, ready') bps = set(('A', 'B', 'C')) for bp in bps: send_gdb('b '+ bp +'') expect_gdb('Breakpoint \d') expect_gdb(r'\(rr\)') hit_bps = { 'A': 0, 'B': 0, 'C': 0 } events = [ re.compile(r'Breakpoint 1, hit_barrier'), re.compile(r'Breakpoint \d, ([ABC])'), re.compile(r'Remote connection closed'), re.compile(r'\(rr\)') ] while 1: send_gdb('s') i = expect_list(events) if 0 == i: break if 2 == i: assert False, 'Program stopped unexpectedly, review gdb_rr.log' if 3 == i: continue bp = last_match().group(1) assert not hit_bps[bp] hit_bps[bp] = 1 expect_gdb(r'\(rr\)') for bp in hit_bps.iterkeys(): assert hit_bps[bp] arch = get_exe_arch() # The locations the threads are stopped at depends on the architecture. stopped_locations = { # on i386, we sometimes stop in the middle of nowhere 'i386': ['(0x[0-9a-f]+ in )?__kernel_vsyscall', '(0x[0-9a-f]+ in )?_traced_raw_syscall', '0x[0-9a-f]+ in \?\?', '(0x[0-9a-f]+ in )?__lll_lock_wait', '(0x[0-9a-f]+ in )?pthread_barrier_wait'], 'i386:x86-64': ['(0x[0-9a-f]+ in )?__lll_lock_wait', '(0x[0-9a-f]+ in )?pthread_barrier_wait', '0x70000010 in \?\?'], } location_regex = '|'.join(stopped_locations[arch]) send_gdb('info threads') expect_gdb(r'3\s+Thread.+?(?:%s)' % location_regex) expect_gdb(r'2\s+Thread.+?(?:%s)' % location_regex) expect_gdb(r'1\s+Thread.+hit_barrier') ok() rr-4.1.0/src/test/step_thread.run000066400000000000000000000000471265436462100167470ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/stray_time_slice_signal.c000066400000000000000000000011561265436462100207610ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { int i; int fd = open("/dev/zero", O_RDONLY); int sum = 0; pid_t pid; int status; test_assert(fd >= 0); pid = fork(); if (!pid) { pid_t pp = getppid(); for (i = 0; i < 1000; ++i) { kill(pp, SIGCHLD); } return 77; } for (i = 0; i < 1000; ++i) { int j; for (j = 0; j < i % 50; ++j) { sum += j * i; } } test_assert(pid == wait(&status)); atomic_printf("token = %d\n", sum); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/stray_time_slice_signal.run000066400000000000000000000001121265436462100213320ustar00rootroot00000000000000source `dirname $0`/util.sh RECORD_ARGS = "-c3" compare_test EXIT-SUCCESS rr-4.1.0/src/test/strict_priorities.c000066400000000000000000000017531265436462100176510ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #include #include #include #define NUM_ITERATIONS (1 << 30) static volatile int main_thread_done = 0; static void* low_priority_func(void* unused) { setpriority(PRIO_PROCESS, 0, 4); /* This thread should never be scheduled again unless/until the main thread exits. */ test_assert(main_thread_done); return NULL; } int main(int argc, char* argv[]) { int i, j; int dummy = 0; pthread_t low_priority_thread; pthread_create(&low_priority_thread, NULL, low_priority_func, NULL); /* Eat some CPU and do some (nonblocking) system calls */ for (i = 0; i < 64; ++i) { getpid(); for (j = 0; j < NUM_ITERATIONS / 64; ++j) { dummy += j % (1 << 20); dummy += j % (79 * (1 << 20)); } } /* Set this before the puts below since the puts could block */ main_thread_done = 1; atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/string_instructions.c000066400000000000000000000164501265436462100202220ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define SIZE 10 * 1024 * 1024 #define FORWARD 1 #define BACKWARD -1 static char* p; static char* q; static char* r; static uintptr_t to_uintptr(char* val) { union { char buf[9]; uintptr_t p; } u; memset(u.buf, 0, sizeof(u.buf)); strcpy(u.buf, val); return u.p; } static inline void set_dir(int dir) { #if defined(__i386__) || defined(__x86_64__) if (dir < 0) { __asm__("std\n\t"); } else { __asm__("cld\n\t"); } #endif } static void string_store(char* dest, uintptr_t a, uintptr_t size, int unit, int dir) { set_dir(dir); #if defined(__i386__) || defined(__x86_64__) switch (unit) { case 1: __asm__("rep stosb\n\t" ::"a"(a), "c"(size), "D"(dest)); break; case 2: __asm__("rep stosw\n\t" ::"a"(a), "c"(size), "D"(dest)); break; case 4: __asm__("rep stosl\n\t" ::"a"(a), "c"(size), "D"(dest)); break; #ifdef __x86_64__ case 8: __asm__("rep stosq\n\t" ::"a"(a), "c"(size), "D"(dest)); break; #endif } #else int i; for (i = 0; i < size; i += unit) { memcpy(dest, &a, unit); dest += dir; } #endif set_dir(1); } static void string_copy(char* dest, char* src, uintptr_t size, int unit, int dir) { set_dir(dir); #if defined(__i386__) || defined(__x86_64__) switch (unit) { case 1: __asm__("rep movsb\n\t" ::"S"(src), "c"(size), "D"(dest)); break; case 2: __asm__("rep movsw\n\t" ::"S"(src), "c"(size), "D"(dest)); break; case 4: __asm__("rep movsl\n\t" ::"S"(src), "c"(size), "D"(dest)); break; #ifdef __x86_64__ case 8: __asm__("rep movsq\n\t" ::"S"(src), "c"(size), "D"(dest)); break; #endif } #else int i; for (i = 0; i < size; i += unit) { memcpy(dest, src, unit); dest += dir; src += dir; } #endif set_dir(1); } static int string_scan_equal(char* s, uintptr_t a, uintptr_t size, int unit, int dir) { char* end = s; set_dir(dir); #if defined(__i386__) || defined(__x86_64__) switch (unit) { case 1: __asm__("repe scasb\n\t" : "=D"(end) : "a"(a), "D"(s), "c"(size)); break; case 2: __asm__("repe scasw\n\t" : "=D"(end) : "a"(a), "D"(s), "c"(size)); break; case 4: __asm__("repe scasl\n\t" : "=D"(end) : "a"(a), "D"(s), "c"(size)); break; #ifdef __x86_64__ case 8: __asm__("repe scasq\n\t" : "=D"(end) : "a"(a), "D"(s), "c"(size)); break; #endif } #else int i; for (i = 0; i < size; i += unit) { end += dir; if (memcmp(end - dir, &a, unit) != 0) { break; } } #endif set_dir(1); return (end - s - dir) / dir; } static int string_scan_not_equal(char* s, uintptr_t a, uintptr_t size, int unit, int dir) { char* end = s; set_dir(dir); #if defined(__i386__) || defined(__x86_64__) switch (unit) { case 1: __asm__("repne scasb\n\t" : "=D"(end) : "a"(a), "D"(s), "c"(size)); break; case 2: __asm__("repne scasw\n\t" : "=D"(end) : "a"(a), "D"(s), "c"(size)); break; case 4: __asm__("repne scasl\n\t" : "=D"(end) : "a"(a), "D"(s), "c"(size)); break; #ifdef __x86_64__ case 8: __asm__("repne scasq\n\t" : "=D"(end) : "a"(a), "D"(s), "c"(size)); break; #endif } #else int i; for (i = 0; i < size; i += unit) { end += dir; if (memcmp(end - dir, &a, unit) == 0) { break; } } #endif set_dir(1); return (end - s - dir) / dir; } static int string_cmp_equal(char* s, char* t, uintptr_t size, int unit, int dir) { char* sp = s; char* tp = t; set_dir(dir); #if defined(__i386__) || defined(__x86_64__) switch (unit) { case 1: __asm__("repe cmpsb\n\t" : "=D"(sp) : "S"(tp), "D"(s), "c"(size)); break; case 2: __asm__("repe cmpsw\n\t" : "=D"(sp) : "S"(tp), "D"(s), "c"(size)); break; case 4: __asm__("repe cmpsl\n\t" : "=D"(sp) : "S"(tp), "D"(s), "c"(size)); break; #ifdef __x86_64__ case 8: __asm__("repe cmpsq\n\t" : "=D"(sp) : "S"(tp), "D"(s), "c"(size)); break; #endif } #else int i; for (i = 0; i < size; i += unit) { sp += dir; tp += dir; if (memcmp(sp - dir, tp - dir, unit) != 0) { break; } } #endif set_dir(1); return (sp - s - dir) / dir; } static int string_cmp_not_equal(char* s, char* t, uintptr_t size, int unit, int dir) { char* sp = s; char* tp = t; set_dir(dir); #if defined(__i386__) || defined(__x86_64__) switch (unit) { case 1: __asm__("repne cmpsb\n\t" : "=D"(sp) : "S"(tp), "D"(s), "c"(size)); break; case 2: __asm__("repne cmpsw\n\t" : "=D"(sp) : "S"(tp), "D"(s), "c"(size)); break; case 4: __asm__("repne cmpsl\n\t" : "=D"(sp) : "S"(tp), "D"(s), "c"(size)); break; #ifdef __x86_64__ case 8: __asm__("repne cmpsq\n\t" : "=D"(sp) : "S"(tp), "D"(s), "c"(size)); break; #endif } #else int i; for (i = 0; i < size; i += unit) { sp += dir; tp += dir; if (memcmp(sp - dir, tp - dir, unit) == 0) { break; } } #endif set_dir(1); return (sp - s - dir) / dir; } int main(int argc, char* argv[]) { int u; uintptr_t pattern = to_uintptr("aaaaaaaa"); uintptr_t pattern2 = to_uintptr("bbbbbbbb"); p = malloc(SIZE); q = malloc(SIZE); r = malloc(SIZE); for (u = 0; u < (sizeof(void*) == 8 ? 4 : 3); ++u) { int unit = 1 << u; int dir = FORWARD * unit; int size_units = SIZE / unit; int ret; memset(p, 0, SIZE); memset(q, 0, SIZE); memset(r, 0, SIZE); string_store(p, pattern, size_units, unit, dir); test_assert(memcmp(&p[SIZE - unit], &pattern, unit) == 0); string_copy(q, p, size_units, unit, dir); test_assert(memcmp(&q[SIZE - unit], &pattern, unit) == 0); memcpy(&p[SIZE - unit], &pattern2, unit); ret = string_scan_equal(p, pattern, size_units, unit, dir); test_assert(ret == size_units - 1); ret = string_scan_not_equal(p, pattern2, size_units, unit, dir); test_assert(ret == size_units - 1); ret = string_cmp_equal(p, q, size_units, unit, dir); test_assert(ret == size_units - 1); memset(&p[SIZE - unit], 0, unit); ret = string_cmp_not_equal(p, r, size_units, unit, dir); test_assert(ret == size_units - 1); dir = BACKWARD * unit; string_store(p + SIZE - unit, pattern2, size_units, unit, dir); test_assert(memcmp(&p[0], &pattern2, unit) == 0); string_copy(q + SIZE - unit, p + SIZE - unit, size_units, unit, dir); test_assert(memcmp(&q[0], &pattern2, unit) == 0); memcpy(&p[0], &pattern, unit); ret = string_scan_equal(p + SIZE - unit, pattern2, size_units, unit, dir); test_assert(ret == size_units - 1); ret = string_scan_not_equal(p + SIZE - unit, pattern, size_units, unit, dir); test_assert(ret == size_units - 1); ret = string_cmp_equal(p + SIZE - unit, q + SIZE - unit, size_units, unit, dir); test_assert(ret == size_units - 1); memset(&p[0], 0, unit); ret = string_cmp_not_equal(p + SIZE - unit, r + SIZE - unit, size_units, unit, dir); test_assert(ret == size_units - 1); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/string_instructions.py000066400000000000000000000037421265436462100204300ustar00rootroot00000000000000from rrutil import * send_gdb('break string_store') expect_gdb('Breakpoint 1') # string_store 1-byte forwards send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('finish') send_gdb('watch -l p[1000000]') expect_gdb('watchpoint') send_gdb('reverse-continue') expect_gdb('Old value = 97') expect_gdb('New value = 0') send_gdb('p p[999999]') expect_gdb('= 97') send_gdb('p p[1000000]') expect_gdb('= 0') send_gdb('p p[1000001]') expect_gdb('= 0') send_gdb('disable') send_gdb('enable 1') # string_store 1-byte backwards send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('finish') send_gdb('watch -l p[1000000]') expect_gdb('watchpoint') send_gdb('reverse-continue') expect_gdb('Old value = 98') expect_gdb('New value = 97') send_gdb('p p[1000001]') expect_gdb('= 98') send_gdb('p p[1000000]') expect_gdb('= 97') send_gdb('p p[999999]') expect_gdb('= 97') send_gdb('disable') send_gdb('enable 1') # string_store 2-bytes forwards send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('finish') send_gdb('watch -l p[1000001]') expect_gdb('watchpoint') send_gdb('reverse-continue') expect_gdb('Old value = 97') expect_gdb('New value = 0') send_gdb('p p[999999]') expect_gdb('= 97') send_gdb('p p[1000000]') expect_gdb('= 0') send_gdb('p p[1000001]') expect_gdb('= 0') send_gdb('disable') send_gdb('enable 1') # string_store 2-bytes backwards # Check that a watch at the end of the loop is OK send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('finish') send_gdb('watch -l p[0]') expect_gdb('watchpoint') send_gdb('reverse-continue') expect_gdb('Old value = 98') expect_gdb('New value = 97') send_gdb('p p[0]') expect_gdb('= 97') send_gdb('disable') send_gdb('enable 1') # string_store 4-bytes forwards # Just check that the late-watchpoint quirk is suppressed send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('watch -l p[800000]') expect_gdb('watchpoint') send_gdb('continue') expect_gdb('Old value = 0') expect_gdb('New value = 97') send_gdb('p p[800003]') expect_gdb('= 97') send_gdb('p p[800004]') expect_gdb('= 0') ok() rr-4.1.0/src/test/string_instructions.run000066400000000000000000000000471265436462100205770ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/string_instructions_break.py000066400000000000000000000006261265436462100215720ustar00rootroot00000000000000from rrutil import * send_gdb('break string_store') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('watch -l p[0]') expect_gdb('watchpoint 2') send_gdb('c') expect_gdb('watchpoint 2') send_gdb('break') expect_gdb('Breakpoint 3') send_gdb('disable 3') send_gdb('finish') expect_gdb('main') send_gdb('enable 3') send_gdb('reverse-continue') expect_gdb('Breakpoint 3') ok() rr-4.1.0/src/test/string_instructions_break.run000066400000000000000000000001471265436462100217440ustar00rootroot00000000000000source `dirname $0`/util.sh record string_instructions_replay$bitness debug string_instructions_break rr-4.1.0/src/test/string_instructions_replay.c000066400000000000000000000025721265436462100215760ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define SIZE 10 * 1024 * 1024 #define DIFF 2 * 1024 * 1024 #define CMP_LEN 3 * 1024 * 1024 static char* p; static char* q; static void string_store(char* dest, int a, uintptr_t size) { #if defined(__i386__) || defined(__x86_64__) __asm__("rep stosb\n\t" ::"a"(a), "c"(size), "D"(dest)); #else memset(dest, a, size); #endif } static int string_compare(char* s1, char* s2, uintptr_t size) { #if defined(__i386__) || defined(__x86_64__) char* result; __asm__("repe cmpsb\n\t" : "=D"(result) : "c"(size), "S"(s1), "D"(s2)); uintptr_t i = result - s2; if (i == size) { return s1[size - 1] == s2[size - 1] ? size : size - 1; } return i - 1; #else for (uintptr_t i = 0; i < size; ++i) { if (s1[i] != s2[i]) { return i; } } return size; #endif } int main(int argc, char* argv[]) { int i; p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); test_assert(p != MAP_FAILED); q = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); test_assert(q != MAP_FAILED); for (i = 1; i < 1000; ++i) { string_store(p, i, SIZE); string_store(q, i, SIZE); q[DIFF] = i ^ 0xff; test_assert(string_compare(p, q, SIZE) == DIFF); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/string_instructions_replay.run000066400000000000000000000005221265436462100221510ustar00rootroot00000000000000source `dirname $0`/util.sh record $TESTNAME & for i in $(seq 1 30); do sleep 0.05 kill -CHLD $rrpid $(pidof $TESTNAME-$nonce) >& /dev/null done # Wait for 'record' to actually terminate. Otherwise we might start # replaying before the trace file has been completely written. wait echo "Replaying ..." replay check 'EXIT-SUCCESS' rr-4.1.0/src/test/string_instructions_replay_quirk.py000066400000000000000000000005271265436462100232150ustar00rootroot00000000000000from rrutil import * send_gdb('break string_store') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('disable') send_gdb('watch -l p[0]') expect_gdb('watchpoint') send_gdb('c') expect_gdb('Old value = 0') expect_gdb('New value = 1') send_gdb('p p[0]') expect_gdb('= 1') send_gdb('p p[1]') expect_gdb('= 0') ok() rr-4.1.0/src/test/string_instructions_replay_quirk.run000066400000000000000000000001551265436462100233660ustar00rootroot00000000000000source `dirname $0`/util.sh record string_instructions_replay$bitness debug string_instructions_replay_quirk rr-4.1.0/src/test/string_instructions_watch.c000066400000000000000000000026031265436462100214030ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static char* buf; static uintptr_t to_uintptr(char* val) { union { char buf[9]; uintptr_t p; } u; memset(u.buf, 0, sizeof(u.buf)); strcpy(u.buf, val); return u.p; } static inline void set_dir(int dir) { #if defined(__i386__) || defined(__x86_64__) if (dir < 0) { __asm__("std\n\t"); } else { __asm__("cld\n\t"); } #endif } static void string_store(char* dest, uintptr_t a, uintptr_t size, int unit, int dir) { set_dir(dir); #if defined(__i386__) || defined(__x86_64__) switch (unit) { case 1: __asm__("rep stosb\n\t" ::"a"(a), "c"(size), "D"(dest)); break; case 2: __asm__("rep stosw\n\t" ::"a"(a), "c"(size), "D"(dest)); break; case 4: __asm__("rep stosl\n\t" ::"a"(a), "c"(size), "D"(dest)); break; #ifdef __x86_64__ case 8: __asm__("rep stosq\n\t" ::"a"(a), "c"(size), "D"(dest)); break; #endif } #else int i; for (i = 0; i < size; i += unit) { memcpy(dest, &a, unit); dest += dir; } #endif set_dir(1); } int main(int argc, char* argv[]) { buf = (char*)mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); string_store(buf, to_uintptr("aaaaaaaa"), 16, 1, 1); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/string_instructions_watch.py000066400000000000000000000006451265436462100216150ustar00rootroot00000000000000from rrutil import * import re send_gdb('b string_store') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('p buf') expect_gdb(re.compile(r'= ([^ ]+)')) buf = eval(last_match().group(1)); send_gdb('watch -l *(uint16_t*)%d'%(buf + 13)) expect_gdb('atchpoint 2') send_gdb('c') expect_gdb('atchpoint 2') send_gdb('p buf[13]') expect_gdb('= 97') send_gdb('p buf[14]') expect_gdb('= 0') ok() rr-4.1.0/src/test/string_instructions_watch.run000066400000000000000000000000471265436462100217650ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/subprocess_exit_ends_session.py000066400000000000000000000010411265436462100222610ustar00rootroot00000000000000import re from rrutil import * BAD_TOKEN = r'EXIT-SUCCESS' GOOD_TOKEN = r'Inferior 1 \(process \d+\) exited normally' def observe_child_crash_and_exit(): expect_gdb('Program received signal SIGSEGV') send_gdb('c') for line in iterlines_both(): m = re.search(BAD_TOKEN, line) if m: failed('Saw illegal token "'+ BAD_TOKEN +'"') m = re.search(GOOD_TOKEN, line) if m: return send_gdb('c') observe_child_crash_and_exit() restart_replay() observe_child_crash_and_exit() ok() rr-4.1.0/src/test/subprocess_exit_ends_session.run000066400000000000000000000002541265436462100224420ustar00rootroot00000000000000source `dirname $0`/util.sh record fork_child_crash$bitness TARGET_PID=$(grep 'child ' record.out | awk '{print $2}') debug subprocess_exit_ends_session "-f $TARGET_PID" rr-4.1.0/src/test/switch_processes.py000066400000000000000000000004401265436462100176550ustar00rootroot00000000000000from rrutil import * import re # Restart at the first debuggable event, which will be in a different # process! We should stay focused on the child process, instead of # trying to switch to that process. At least we shouldn't crash. restart_replay(1) expect_gdb('exited normally') ok() rr-4.1.0/src/test/switch_processes.run000066400000000000000000000002521265436462100200320ustar00rootroot00000000000000source `dirname $0`/util.sh save_exe write_race$bitness saved_exe="write_race$bitness-$nonce" record target_process$bitness $saved_exe debug switch_processes "-g 1000" rr-4.1.0/src/test/switch_read.c000066400000000000000000000025661265436462100163670ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static const char start_token = '!'; static const char sentinel_token = ' '; static pthread_t reader; static pthread_barrier_t barrier; static int sockfds[2]; static void* reader_thread(void* dontcare) { int readsock = sockfds[1]; char c = sentinel_token; struct timeval tv; pthread_barrier_wait(&barrier); atomic_puts("r: blocking on read ..."); test_assert(1 == read(readsock, &c, sizeof(c))); gettimeofday(&tv, NULL); atomic_printf("r: ... read '%c'\n", c); test_assert(c == start_token); return NULL; } int main(int argc, char* argv[]) { char token = start_token; struct timeval ts; /* (Kick on the syscallbuf if it's enabled.) */ gettimeofday(&ts, NULL); socketpair(AF_LOCAL, SOCK_STREAM, 0, sockfds); pthread_barrier_init(&barrier, NULL, 2); pthread_create(&reader, NULL, reader_thread, NULL); pthread_barrier_wait(&barrier); /* Force a blocked read() that's interrupted by a SIGUSR1, * which then itself blocks on read() and succeeds. */ atomic_puts("M: sleeping ..."); usleep(500000); atomic_printf("M: finishing reader by writing '%c' to socket ...\n", token); write(sockfds[0], &token, sizeof(token)); ++token; atomic_puts("M: ... done"); pthread_join(reader, NULL); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sync.c000066400000000000000000000005431265436462100150400ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define FILENAME "foo.txt" int main(void) { int fd; sync(); fd = open(FILENAME, O_CREAT | O_RDWR, 0600); test_assert(0 == unlink(FILENAME)); test_assert(fd >= 0); test_assert(0 == syncfs(fd)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/syscallbuf_fd_disabling.c000066400000000000000000000004001265436462100207100ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { atomic_puts("Line 1"); atomic_puts("Line 2"); atomic_puts("Line 3"); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/syscallbuf_fd_disabling.run000066400000000000000000000003361265436462100213020ustar00rootroot00000000000000source `dirname $0`/util.sh GLOBAL_OPTIONS="$GLOBAL_OPTIONS -M" record $TESTNAME if [[ "record.out" != $(grep -l '^\[.*\]Line 2$' record.out) ]]; then failed "Missing line annotation" exit fi compare_test EXIT-SUCCESS rr-4.1.0/src/test/syscallbuf_signal_reset.c000066400000000000000000000007321265436462100207720ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void sighandler(int sig) { /* Must be a syscall we've already executed, otherwise patching gets in the * way */ open("/dev/zero", O_RDONLY); atomic_puts("EXIT-SUCCESS"); exit(0); } int main(int argc, char* argv[]) { char ch; int fd = open("/dev/zero", O_RDONLY); signal(SIGSEGV, sighandler); read(fd, &ch, 1); *(int*)0 = 0; return 0; } rr-4.1.0/src/test/syscallbuf_timeslice.c000066400000000000000000000006061265436462100202710ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char** argv) { int fd; char buf[10]; int i; fd = open("/dev/zero", O_RDONLY); for (i = 0; i < 1 << 12; ++i) { read(fd, buf, sizeof(buf)); if (!(i & ((1 << 8) - 1))) { atomic_printf("."); } } atomic_puts("\nEXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/syscallbuf_timeslice2.c000066400000000000000000000006271265436462100203560ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char** argv) { int fd; char buf[10]; int i; for (i = 0; i < 1 << 12; ++i) { fd = open("/dev/zero", O_RDONLY); read(fd, buf, sizeof(buf)); close(fd); if (!(i & ((1 << 8) - 1))) { atomic_printf("."); } } atomic_puts("\nEXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/syscallbuf_timeslice2_250.run000066400000000000000000000003661265436462100213260ustar00rootroot00000000000000source `dirname $0`/util.sh # It's relatively easy to reproduce a CPUID divergence caused by lack # of CPU binding. GLOBAL_OPTIONS="$GLOBAL_OPTIONS_BIND_CPU" RECORD_ARGS="-c250" record syscallbuf_timeslice2$bitness replay check 'EXIT-SUCCESS' rr-4.1.0/src/test/syscallbuf_timeslice_250.run000066400000000000000000000003651265436462100212430ustar00rootroot00000000000000source `dirname $0`/util.sh # It's relatively easy to reproduce a CPUID divergence caused by lack # of CPU binding. GLOBAL_OPTIONS="$GLOBAL_OPTIONS_BIND_CPU" RECORD_ARGS="-c250" record syscallbuf_timeslice$bitness replay check 'EXIT-SUCCESS' rr-4.1.0/src/test/sysconf.c000066400000000000000000000010221265436462100155410ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { long pagesize = sysconf(_SC_PAGESIZE); long ncpus = sysconf(_SC_NPROCESSORS_ONLN); atomic_printf("sysconf says page size is %ld bytes\n", pagesize); test_assert(4096 == pagesize); atomic_printf("sysconf says %ld processors are online\n", ncpus); /* TODO: change this when rr supports parallel recording. */ test_assert(1 == ncpus); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sysctl.c000066400000000000000000000012711265436462100154040ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #include int main(int argc, char* argv[]) { int name[2] = { CTL_KERN, KERN_RTSIGMAX }; int sig_max = -1; size_t len = sizeof(sig_max); name[0] = CTL_KERN; name[1] = KERN_RTSIGMAX; if (sysctl(name, 2, &sig_max, &len, NULL, 0) == -1) { /* many kernels don't support this */ atomic_printf("sysctl KERN_RTSIGMAX returned errno %d\n", errno); atomic_puts("EXIT-SUCCESS"); } else { assert(len == sizeof(sig_max)); atomic_printf("sysctl KERN_RTSIGMAX returned %d\n", sig_max); assert(sig_max > 0); atomic_puts("EXIT-SUCCESS"); } return 0; } rr-4.1.0/src/test/sysemu_singlestep.c000066400000000000000000000011051265436462100176410ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { int var = 41; #ifdef __i386__ __asm__ __volatile__("int $0x80\n\t" "incl %0\n\t" : "=m"(var) : "a"(SYS_gettid)); #elif defined(__x86_64__) __asm__ __volatile__("syscall\n\t" "incl %0\n\t" : "=m"(var) : "a"(SYS_gettid)); #endif test_assert(var == 42); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/sysinfo.c000066400000000000000000000005461265436462100155610ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { struct sysinfo* info; ALLOCATE_GUARD(info, 0); test_assert(0 == sysinfo(info)); test_assert(info->mem_unit > 0); test_assert(info->procs > 0); VERIFY_GUARD(info); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/target_fork.c000066400000000000000000000013401265436462100163670ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void bad_breakpoint(void) { int break_here = 1; (void)break_here; } static void good_breakpoint(void) { int break_here = 1; (void)break_here; } int main(int argc, char** argv) { int num_syscalls; int child; int i; bad_breakpoint(); test_assert(argc == 2); num_syscalls = atoi(argv[1]); atomic_printf("%d: running %d syscalls ...\n", getpid(), num_syscalls); for (i = 0; i < num_syscalls; ++i) { geteuid(); } if (0 == (child = fork())) { good_breakpoint(); exit(0); } atomic_printf("child %d\n", child); waitpid(child, NULL, 0); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/target_fork.run000066400000000000000000000003321265436462100167510ustar00rootroot00000000000000source `dirname $0`/util.sh EVENTS=1000 record $TESTNAME $EVENTS TARGET_PID=$(grep 'child ' record.out | awk '{print $2}') echo Targeting recorded pid $TARGET_PID ... debug bad_good_break "-f $TARGET_PID -g $EVENTS" rr-4.1.0/src/test/target_process.c000066400000000000000000000010331265436462100171030ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char** argv) { const char* exe_image; int child; test_assert(argc == 2); exe_image = argv[1]; atomic_printf("%d: forking and exec'ing %s...\n", getpid(), exe_image); if (0 == (child = fork())) { execl(exe_image, exe_image, NULL); test_assert("Not reached; execl() failed." && 0); } atomic_printf("child %d\n", child); waitpid(child, NULL, 0); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/target_process.run000066400000000000000000000004421265436462100174700ustar00rootroot00000000000000source `dirname $0`/util.sh save_exe breakpoint$bitness saved_breakpoint="breakpoint$bitness-$nonce" record $TESTNAME "$saved_breakpoint" TARGET_PID=$(grep 'child ' record.out | awk '{print $2}') echo Targeting recorded pid $TARGET_PID ... debug restart_breakpoint "-p $TARGET_PID -g 1" rr-4.1.0/src/test/tcgets.c000066400000000000000000000006761265436462100153640ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { struct termios tc = { 0 }; int ret; ret = ioctl(STDIN_FILENO, TCGETS, &tc); atomic_printf("TCGETS returned %d: { iflag=0x%x, oflag=0x%x, cflag=0x%x, " "lflag=0x%x }\n", ret, tc.c_iflag, tc.c_oflag, tc.c_cflag, tc.c_lflag); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/term_nonmain.c000066400000000000000000000015441265436462100165540ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void waittermsig(int sig, const char* waiter) { struct timespec ts = {.tv_sec = 1 }; sigset_t set; siginfo_t si; sigemptyset(&set); sigaddset(&set, sig); sigtimedwait(&set, &si, &ts); atomic_printf("FAILED: %s: signal %d either not caught or didn't terminate " "process within 1 second\n", waiter, sig); } static void* kill_thread(void* dontcare) { const int termsig = SIGTERM; atomic_puts("killing..."); kill(getpid(), termsig); waittermsig(termsig, "kill_thread"); return NULL; /* not reached */ } int main(int argc, char* argv[]) { pthread_t t; pthread_create(&t, NULL, kill_thread, NULL); pthread_join(t, NULL); atomic_puts("FAILED: joined thread that should have died"); return 0; } rr-4.1.0/src/test/term_nonmain.run000066400000000000000000000000671265436462100171350ustar00rootroot00000000000000source `dirname $0`/util.sh compare_test 'killing ...' rr-4.1.0/src/test/term_rr.c000066400000000000000000000003211265436462100155300ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { atomic_puts("EXIT-SUCCESS"); kill(getppid(), SIGTERM); return 0; } rr-4.1.0/src/test/term_rr.py000066400000000000000000000002731265436462100157440ustar00rootroot00000000000000from rrutil import * send_gdb('handle SIGKILL stop') send_gdb('break main') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('SIGKILL') ok() rr-4.1.0/src/test/term_rr.run000066400000000000000000000000471265436462100161170ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/term_trace_cpu.run000066400000000000000000000030761265436462100174460ustar00rootroot00000000000000source `dirname $0`/util.sh # We want to test two things here: # 1) Terminating rr when the next tracee event is a reschedule # 2) Terminating rr when the syscallbuf contains events that need to be flushed # To test both of these, we need to set the reschedule threshold high enough # that our signal is delivered before the first reschedule after sending # the SYNC_TOKEN. We also need to set the reschedule threshold low enough # that a reschedule occurs before chew_cpu's spin() loop finishes. Currently # that loop has 2^30 iterations, so 100M events per reschedule sounds good. RECORD_ARGS="-c100000000" EXE=chew_cpu$bitness SYNC_TOKEN=spinning WAIT_SECS=1 record $EXE & echo "Waiting for token '$SYNC_TOKEN' from tracee ..." until grep -q $SYNC_TOKEN record.out; do sleep 0 done rrpid=$(parent_pid_of $(pidof $EXE-$nonce)) # It's possible for this signal to be acted on before chew_cpu actually # reaches the spin() loop, e.g. with syscallbuf disabled it might be delivered # before rr has finished handling the syscalls of atomic_puts. But scheduling # variations should ensure that sometimes we reach spin() first. echo " done. Delivering SIGTERM to $rrpid ..." kill -TERM $rrpid echo " done." # Wait for 'record' to actually terminate. Otherwise we might start # replaying before the trace file has been completely written, and we might # fail to see the tracee write EXIT-SUCCESS. wait if [[ "record.out" == $(grep -l "EXIT-SUCCESS" record.out) ]]; then failed "error during recording: tracer not interrupted in time." fi echo "Replaying ..." replay check "" rr-4.1.0/src/test/term_trace_syscall.run000066400000000000000000000013721265436462100203260ustar00rootroot00000000000000source `dirname $0`/util.sh EXE=nanosleep$bitness SYNC_TOKEN=sleeping record $EXE 10000 & # sleep "forever" echo "Waiting for token '$SYNC_TOKEN' from tracee ..." until grep -q $SYNC_TOKEN record.out; do sleep 0 done rrpid=$(parent_pid_of $(pidof $EXE-$nonce)) echo " done. Delivering SIGTERM to $rrpid ..." kill -TERM $rrpid echo " done." # Wait for 'record' to actually terminate. Otherwise we might start # replaying before the trace file has been completely written, and we might # fail to see the tracee write EXIT-SUCCESS. wait if [[ "record.out" == $(grep -l "EXIT-SUCCESS" record.out) ]]; then echo "Test '$TESTNAME' FAILED: error during recording: tracer not interrupted in time." fi echo "Replaying ..." replay check "" rr-4.1.0/src/test/test_lib.c000066400000000000000000000007071265436462100156730ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void* start_thread(void* dontcare) { return NULL; } static void constructor(void) __attribute__((constructor)); static void constructor(void) { struct timeval tv; pthread_t t; gettimeofday(&tv, NULL); pthread_create(&t, NULL, start_thread, NULL); pthread_join(t, NULL); } void lib_exit_success(void) { atomic_puts("EXIT-SUCCESS"); } rr-4.1.0/src/test/test_setup.gdb000066400000000000000000000000751265436462100165750ustar00rootroot00000000000000set pagination off handle SIGSEGV stop handle SIGKILL nostop rr-4.1.0/src/test/tgkill.c000066400000000000000000000011601265436462100153460ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int num_signals_caught; static int tgkill(int tgid, int tid, int sig) { return syscall(SYS_tgkill, tgid, tid, sig); } static void sighandler(int sig) { atomic_printf("Task %d got signal %d\n", sys_gettid(), sig); ++num_signals_caught; } int main(int argc, char* argv[]) { signal(SIGUSR1, sighandler); signal(SIGUSR2, sighandler); tgkill(getpid(), sys_gettid(), SIGUSR1); tgkill(getpid(), sys_gettid(), SIGUSR2); test_assert(2 == num_signals_caught); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/thread_stress.c000066400000000000000000000014131265436462100167330ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" /* Chosen so that |3MB * THREAD_GROUPS * THREADS_PER_GROUP| exhausts a * 32-bit address space. */ #define THREAD_GROUPS 150 #define THREADS_PER_GROUP 10 static void* thread(void* unused) { struct timeval tv; gettimeofday(&tv, NULL); return NULL; } int main(int argc, char* argv[]) { int i; for (i = 0; i < THREAD_GROUPS; ++i) { pthread_t threads[THREADS_PER_GROUP]; int j; for (j = 0; j < THREADS_PER_GROUP; ++j) { test_assert(0 == pthread_create(&threads[j], NULL, thread, NULL)); } for (j = 0; j < THREADS_PER_GROUP; ++j) { test_assert(0 == pthread_join(threads[j], NULL)); } } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/thread_yield.c000066400000000000000000000011111265436462100165110ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int spin(int iterations) { int i, dummy = 0; atomic_puts("spinning"); for (i = 1; i < iterations; ++i) { dummy += i % (1 << 20); dummy += i % (79 * (1 << 20)); } return dummy; } static int ran_thread = 0; static void* do_thread(void* p) { ran_thread = 1; return NULL; } int main(int argc, char* argv[]) { pthread_t t; pthread_create(&t, NULL, do_thread, NULL); spin(1 << 28); test_assert(ran_thread); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/threaded_syscall_spam.c000066400000000000000000000027231265436462100204200ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static int num_its; static void syscall_spam(void) { int i; struct timespec ts; struct timeval tv; for (i = 0; i < 1 << num_its; ++i) { /* The odds of the signal being caught in the library * implementing these syscalls is very high. But even * if it's not caught there, this test will pass. */ clock_gettime(CLOCK_MONOTONIC, &ts); gettimeofday(&tv, NULL); clock_gettime(CLOCK_MONOTONIC, &ts); gettimeofday(&tv, NULL); clock_gettime(CLOCK_MONOTONIC, &ts); gettimeofday(&tv, NULL); clock_gettime(CLOCK_MONOTONIC, &ts); gettimeofday(&tv, NULL); } } static void unblock_signals(void) { sigset_t set; sigfillset(&set); test_assert(0 == pthread_sigmask(SIG_UNBLOCK, &set, NULL)); atomic_printf(" %d: unblocked all sigs\n", sys_gettid()); } static void* thread(void* unused) { unblock_signals(); syscall_spam(); return NULL; } int main(int argc, char** argv) { sigset_t set; pthread_t t; test_assert(argc == 2); num_its = atoi(argv[1]); test_assert(num_its > 0); atomic_printf("Running 2^%d iterations in two threads\n", num_its); atomic_printf("parent %d: blocking all sigs ...\n", getpid()); sigfillset(&set); test_assert(0 == pthread_sigmask(SIG_BLOCK, &set, NULL)); pthread_create(&t, NULL, thread, NULL); unblock_signals(); syscall_spam(); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/threaded_syscall_spam.run000066400000000000000000000006641265436462100210040ustar00rootroot00000000000000source `dirname $0`/util.sh # Without the syscallbuf, trying to record the large number of # syscalls in this test is impractical. skip_if_no_syscall_buf # 2^17 iterations is arbitrarily chosen to take ~3s on a fast machine record $TESTNAME 17 # Because of issue #184, replay takes longer than practical. So for # now we'll skip it and hope other tests exercise the relevant code # well enough. #replay #check 'EXIT-SUCCESS' passed rr-4.1.0/src/test/threads.c000066400000000000000000000025511265436462100155170ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" long int counter = 0; pthread_barrier_t bar; void catcher(int sig) { atomic_printf("Signal caught, Counter is %ld\n", counter); atomic_puts("EXIT-SUCCESS"); _exit(0); } void* receiver(void* name) { struct sigaction sact; sigemptyset(&sact.sa_mask); sact.sa_flags = 0; sact.sa_handler = catcher; sigaction(SIGALRM, &sact, NULL); pthread_barrier_wait(&bar); while (1) { counter++; if (counter % 100000 == 0) { write(1, ".", 1); } } return NULL; } void* sender(void* id) { sleep(1); pthread_barrier_wait(&bar); pthread_kill(*((pthread_t*)id), SIGALRM); return NULL; } int main(void) { struct timeval tv; pthread_t thread1, thread2; /* (Kick on the syscallbuf lib.) */ gettimeofday(&tv, NULL); /* init barrier */ pthread_barrier_init(&bar, NULL, 2); /* Create independent threads each of which will execute * function */ pthread_create(&thread1, NULL, receiver, NULL); pthread_create(&thread2, NULL, sender, &thread1); /* Wait till threads are complete before main * continues. Unless we wait we run the risk of executing an * exit which will terminate the process and all threads * before the threads have completed. */ pthread_join(thread1, NULL); pthread_join(thread2, NULL); return 0; } rr-4.1.0/src/test/threads.run000066400000000000000000000005651265436462100161040ustar00rootroot00000000000000source `dirname $0`/util.sh # When the syscallbuf is enabled, this test looks to rr like a long # series of CPU chewing, with buffer flushes at each time-slice # interrupt. This makes replay take a pathologically long time # because of all the async-signal replaying. So speed things up by # bumping up the timeslice. RECORD_ARGS="-c20000000" compare_test EXIT-SUCCESS rr-4.1.0/src/test/timer.c000066400000000000000000000037471265436462100152150ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static volatile int caught_sig = 0; void catcher(int signum, siginfo_t* siginfo_ptr, void* ucontext_ptr) { caught_sig = signum; } int main(int argc, char* argv[]) { timer_t* id; struct itimerspec its = { { 100000, 0 }, { 0, 100000000 } }; struct itimerspec its2 = { { 100000, 0 }, { 100000, 0 } }; struct itimerspec* old; struct itimerspec* old2; struct sigaction sact; int counter; sigemptyset(&sact.sa_mask); sact.sa_flags = SA_SIGINFO; sact.sa_sigaction = catcher; sigaction(SIGALRM, &sact, NULL); ALLOCATE_GUARD(id, 'a'); test_assert(0 == timer_create(CLOCK_REALTIME, NULL, id)); VERIFY_GUARD(id); test_assert(0 == timer_settime(*id, 0, &its, NULL)); for (counter = 0; counter >= 0 && !caught_sig; counter++) { if (counter % 100000 == 0) { write(STDOUT_FILENO, ".", 1); } } atomic_printf("\nSignal %d caught, Counter is %d\n", caught_sig, counter); test_assert(SIGALRM == caught_sig); test_assert(0 == timer_getoverrun(*id)); ALLOCATE_GUARD(old, 'b'); test_assert(0 == timer_settime(*id, 0, &its2, old)); VERIFY_GUARD(old); test_assert(old->it_interval.tv_sec == its.it_interval.tv_sec); test_assert(old->it_interval.tv_nsec == its.it_interval.tv_nsec); test_assert(old->it_value.tv_sec <= its.it_interval.tv_sec); test_assert(old->it_value.tv_sec >= its.it_interval.tv_sec / 2); test_assert(old->it_value.tv_nsec < 1000000000); ALLOCATE_GUARD(old2, 'c'); test_assert(0 == timer_gettime(*id, old2)); VERIFY_GUARD(old2); test_assert(old2->it_interval.tv_sec == its2.it_interval.tv_sec); test_assert(old2->it_interval.tv_nsec == its2.it_interval.tv_nsec); test_assert(old2->it_value.tv_sec <= its2.it_interval.tv_sec); test_assert(old2->it_value.tv_sec >= its2.it_interval.tv_sec / 2); test_assert(old2->it_value.tv_nsec < 1000000000); test_assert(0 == timer_delete(*id)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/timerfd.c000066400000000000000000000024131265436462100155140ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { int fd = timerfd_create(CLOCK_MONOTONIC, 0); struct itimerspec spec, old; uint64_t num_expirations; atomic_printf("created timerfd %d\n", fd); test_assert(fd >= 0); memset(&spec, 0, sizeof(spec)); spec.it_value.tv_nsec = 100000000; atomic_printf("setting timer to expire in {sec:%ld,nsec:%ld}\n", spec.it_value.tv_sec, spec.it_value.tv_nsec); timerfd_settime(fd, 0, &spec, &old); atomic_printf(" (old expiration was {sec:%ld,nsec:%ld})\n", old.it_value.tv_sec, old.it_value.tv_nsec); test_assert(0 == old.it_value.tv_sec && 0 == old.it_value.tv_nsec); atomic_puts("sleeping 50ms ..."); usleep(50000); timerfd_gettime(fd, &old); atomic_printf(" expiration now in {sec:%ld,nsec:%ld})\n", old.it_value.tv_sec, old.it_value.tv_nsec); test_assert(0 == old.it_value.tv_sec && old.it_value.tv_nsec <= 50000000); atomic_puts("waiting for timer to expire ..."); read(fd, &num_expirations, sizeof(num_expirations)); atomic_printf(" timer expired %" PRIu64 " times\n", num_expirations); test_assert(1 == num_expirations); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/times.c000066400000000000000000000007651265436462100152130ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { struct tms* buf; clock_t t; ALLOCATE_GUARD(buf, -1); test_assert((t = times(buf)) != (clock_t)-1); test_assert(buf->tms_cutime == 0); test_assert(buf->tms_utime >= 0); VERIFY_GUARD(buf); atomic_printf("tms_utime = %lld\n", (long long)buf->tms_utime); atomic_printf("result = %lld\n", (long long)t); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/tiocgpgrp.c000066400000000000000000000005211265436462100160560ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { int ret; pid_t pgrp = 0; ret = ioctl(STDIN_FILENO, TIOCGPGRP, &pgrp); atomic_printf("TIOCGPGRP returned process group %d (ret:%d)\n", pgrp, ret); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/tiocgwinsz.c000066400000000000000000000006161265436462100162650ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { int ret; struct winsize w; memset(&w, 0x5a, sizeof(w)); ret = ioctl(STDIN_FILENO, TIOCGWINSZ, &w); atomic_printf("TIOCGWINSZ returned {row:%d col:%d} (ret:%d)\n", w.ws_row, w.ws_col, ret); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/tiocinq.c000066400000000000000000000005061265436462100155310ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { int navail; int ret; ret = ioctl(STDIN_FILENO, TIOCINQ, &navail); atomic_printf("TIOCINQ returned navail=%d (ret:%d)\n", navail, ret); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/tiocinq.run000066400000000000000000000001311265436462100161050ustar00rootroot00000000000000source `dirname $0`/util.sh echo "hello" | (record $TESTNAME) replay check EXIT-SUCCESS rr-4.1.0/src/test/trace_version.run000066400000000000000000000015671265436462100173200ustar00rootroot00000000000000source `dirname $0`/util.sh TRACE_DIR=trace_0 function expect_replay_fail { replay if [[ $(cat replay.err) == "" ]]; then echo "Test '$TESTNAME' FAILED: replay should have failed, but it succeeded." exit 1 fi echo " (replay failed as expected)" } record simple$bitness trace_dir="simple$bitness-$nonce-0" if [ ! -f "$trace_dir/version" ]; then echo "Test '$TESTNAME' FAILED: version file not found in trace directory." exit 1 fi; echo "Moving version file away ..." mv "$trace_dir/version" ./version.tmp expect_replay_fail echo "Trying to replay with empty version file ..." echo "" > "$trace_dir/version" expect_replay_fail echo "Trying to replay with dummy version number ..." echo "-42\n" > "$trace_dir/version" expect_replay_fail echo "Restoring trace version file ..." mv ./version.tmp "$trace_dir/version" replay check EXIT-SUCCESS rr-4.1.0/src/test/truncate.c000066400000000000000000000017331265436462100157130ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #define _FILE_OFFSET_BITS 64 #include "rrutil.h" #define TEST_FILE "foo.txt" ssize_t get_file_size(const char* filename) { struct stat* st; ssize_t result; ALLOCATE_GUARD(st, 'x'); test_assert(0 == stat(filename, st)); result = st->st_size; FREE_GUARD(st); return result; } int main(int argc, char* argv[]) { int fd; ssize_t size; fd = open(TEST_FILE, O_CREAT | O_EXCL | O_RDWR, 0600); test_assert(0 <= fd); size = get_file_size(TEST_FILE); atomic_printf("initial file size: %zd\n", size); test_assert(0 == size); truncate(TEST_FILE, 4096); size = get_file_size(TEST_FILE); atomic_printf("after truncate(4096): %zd\n", size); test_assert(4096 == size); ftruncate(fd, 8192); size = get_file_size(TEST_FILE); atomic_printf("after truncate(8192): %zd\n", size); test_assert(8192 == size); unlink(TEST_FILE); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/tty_ioctls.c000066400000000000000000000004531265436462100162610ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { int fd = open("/dev/ptmx", O_RDONLY); test_assert(fd >= 0); atomic_printf("tty ptsname = %s\n", ptsname(fd)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/uname.c000066400000000000000000000013351265436462100151710ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { struct utsname* buf; ALLOCATE_GUARD(buf, 0); test_assert(0 == uname(buf)); test_assert(buf->sysname[0] != 0); test_assert(buf->nodename[0] != 0); test_assert(buf->release[0] != 0); test_assert(buf->version[0] != 0); test_assert(buf->machine[0] != 0); VERIFY_GUARD(buf); atomic_printf("{ sysname: '%s', nodename: '%s', release: '%s',\n" " version: '%s', machine: '%s', domainname: '%s' }\n", buf->sysname, buf->nodename, buf->release, buf->version, buf->machine, buf->domainname); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/unexpected_stack_growth.c000066400000000000000000000013101265436462100210000ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static volatile int vv = 0; static void breakpoint(void) {} static void funcall(void) { char buf[2000000]; int i; for (i = 0; i < sizeof(buf); ++i) { buf[i] = (char)i; } for (i = 0; i < sizeof(buf); ++i) { vv += buf[i % 777777]; } } int main(int argc, char* argv[]) { char v; char* fix_addr; void* p; breakpoint(); fix_addr = (char*)(((uintptr_t)&v - 256 * 1024) & ~(uintptr_t)(PAGE_SIZE - 1)); p = mmap(fix_addr, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); test_assert(p == fix_addr); funcall(); return 0; } rr-4.1.0/src/test/unexpected_stack_growth.py000066400000000000000000000004741265436462100212200ustar00rootroot00000000000000from rrutil import * send_gdb('break breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('finish') send_gdb('watch -l *(&v - 1000000)') expect_gdb('Hardware[()/a-z ]+watchpoint 2') send_gdb('c') expect_gdb('signal SIGSEGV') send_gdb('c') expect_gdb('exited normally') ok() rr-4.1.0/src/test/unexpected_stack_growth.run000066400000000000000000000000471265436462100213700ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/unjoined_thread.c000066400000000000000000000005131265436462100172230ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void* thread(void* unused) { sleep(-1); return NULL; } int main(int argc, char* argv[]) { pthread_t t; pthread_create(&t, NULL, thread, NULL); /* Don't join |t|. */ atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/unshare.c000066400000000000000000000050301265436462100155250ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" extern int capset(cap_user_header_t header, const cap_user_data_t data); static char tmp_name[] = "/tmp/rr-unshare-tmp-XXXXXX"; static void* start_thread(void* p) { test_assert(0 == unshare(CLONE_FILES)); test_assert(0 == close(STDOUT_FILENO)); return NULL; } static void run_child(void) { pid_t child = fork(); int status; if (!child) { struct __user_cap_header_struct hdr = { _LINUX_CAPABILITY_VERSION_1, 0 }; struct __user_cap_data_struct data = { 0x1, 0x1, 0x1 }; /* Test creating a nested child */ pid_t nested_child = fork(); if (!nested_child) { exit(77); } test_assert(nested_child == wait(&status)); test_assert(WIFEXITED(status) && 77 == WEXITSTATUS(status)); /* Test creating a thread */ pthread_t thread; pthread_create(&thread, NULL, start_thread, NULL); pthread_join(thread, NULL); /* Test using capset. capset is privileged, but we are privileged in our user namespace. */ test_assert(0 == capset(&hdr, &data)); /* stdout should still be writable due to the unshare() */ test_assert(13 == write(STDOUT_FILENO, "EXIT-SUCCESS\n", 13)); exit(55); } test_assert(child == wait(&status)); test_assert(WIFEXITED(status) && 55 == WEXITSTATUS(status)); } static int run_test(void) { int ret; int fd; struct rlimit nofile; /* Emulate what sandboxes trying to close all open file descriptors */ test_assert(0 == getrlimit(RLIMIT_NOFILE, &nofile)); for (fd = STDOUT_FILENO + 1; fd < nofile.rlim_cur; ++fd) { ret = close(fd); test_assert(ret == 0 || (ret == -1 && errno == EBADF)); } ret = unshare(CLONE_NEWUSER); if (ret == -1 && (errno == EINVAL || errno == EPERM)) { atomic_puts("EXIT-SUCCESS"); return 77; } test_assert(0 == ret); test_assert(0 == unshare(CLONE_NEWNS)); test_assert(0 == unshare(CLONE_NEWIPC)); test_assert(0 == unshare(CLONE_NEWNET)); ret = unshare(CLONE_NEWPID); if (ret == -1 && errno == EINVAL) { atomic_puts("EXIT-SUCCESS"); return 77; } test_assert(0 == ret); test_assert(0 == chroot(tmp_name)); run_child(); return 77; } int main(int argc, char* argv[]) { pid_t child; int ret; int status; test_assert(tmp_name == mkdtemp(tmp_name)); child = fork(); if (!child) { return run_test(); } ret = wait(&status); test_assert(0 == rmdir(tmp_name)); test_assert(child == ret); test_assert(WIFEXITED(status) && 77 == WEXITSTATUS(status)); return 0; } rr-4.1.0/src/test/user_ignore_sig.c000066400000000000000000000010631265436462100172450ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void handle_usr1(int sig) { test_assert("Shouldn't have caught SIGUSR1" && 0); } int main(int argc, char* argv[]) { /* NB: unlike most other rr tests, this one verifies that rr * can "intervene" in execution to block signals, for the * purposes of unit tests. This test *will* fail if not run * under rr with the right command-line options. */ signal(SIGUSR1, handle_usr1); raise(SIGUSR1); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/user_ignore_sig.run000066400000000000000000000001741265436462100176310ustar00rootroot00000000000000source `dirname $0`/util.sh # Ignore SIGUSR1; block its delivery to tracees. RECORD_ARGS="-i10" compare_test EXIT-SUCCESS rr-4.1.0/src/test/util.sh000066400000000000000000000262421265436462100152350ustar00rootroot00000000000000# # This file is included by foo.run test-driver files. It provides # some helpers for common test operations. A driver file foo.run # will want to include this file as follows # # source `dirname $0`/util.sh # # It is essential that util.sh inherit its $n parameters from the # driver script's parameters. # # Most tests are either "compare_test"s, which check that record and # replay successfully complete and the output is the same, or, # "debug_test"s, which launch a debugger script. So the remainder of # your test runner probably looks like # # compare_test # or, |debug_test| # # Test runners may set the environment variable $RECORD_ARGS to pass # arguments to rr for recording. This is only useful for tweaking the # scheduler, don't use it for anything else. # # delay_kill # # Deliver the signal |sig|, after waiting |delay_secs| seconds, to the # process named |proc|. If there's more than |proc|, the signal is # not delivered. function delay_kill { sig=$1; delay_secs=$2; proc=$3 sleep $delay_secs pid="" for i in `seq 1 5`; do live=`ps ax -o 'pid= cmd=' | awk '{print $1 " " $2}' | grep $proc` num=`echo "$live" | wc -l` if [[ "$num" -eq 1 ]]; then pid=`echo "$live" | awk '{print $1}'` break fi sleep 0.1 done if [[ "$num" -gt 1 ]]; then leave_data=y echo FAILED: "$num" of "'$proc'" >&2 exit 1 elif [[ -z "$pid" ]]; then leave_data=y echo FAILED: process "'$proc'" not located >&2 exit 1 fi kill -s $sig $pid if [[ $? != 0 ]]; then # Sometimes we fail to deliver a signal to a process because # it finished first (due to scheduling races). That's a benign # failure. echo signal $sig not delivered to "'$proc'", letting test succeed anyway else echo Successfully delivered signal $sig to "'$proc'" fi } function fatal { #... echo "$@" >&2 exit 1 } function onexit { cd if [[ "$leave_data" != "y" ]]; then rm -rf $workdir else echo Test $TESTNAME failed, leaving behind $workdir echo To replay the failed test, run echo " " _RR_TRACE_DIR="$workdir" rr replay fi } function parent_pid_of { pid=$1 ps -p $pid -o ppid= } function usage { echo Usage: "util.sh TESTNAME [LIB_ARG] [OBJDIR]" } DEFAULT_FLAGS="--suppress-environment-warnings --check-cached-mmaps --fatal-errors" # Don't bind record/replay tracees to the same logical CPU. When we # do that, the tests take impractically long to run. # # TODO: find a way to run faster with CPU binding GLOBAL_OPTIONS="$DEFAULT_FLAGS" # ... but tests that DO want CPU binding can override the default by # setting # # GLOBAL_OPTIONS="$GLOBAL_OPTIONS_BIND_CPU" # # just after sourcing this file. GLOBAL_OPTIONS_BIND_CPU="$DEFAULT_FLAGS" LIB_ARG=$1 SRCDIR=$2 if [[ ! -d "$SRCDIR" ]]; then fatal "FAILED: srcdir missing" fi OBJDIR=$3 if [[ "$OBJDIR" == "" ]]; then # Default to assuming that the user's working directory is the # test/ directory within the rr clone. OBJDIR=`realpath ../../../obj` fi TESTNAME=$4 if [[ "$TESTNAME" == "" ]]; then [[ $0 =~ ([A-Za-z0-9_]+)\.run$ ]] || fatal "FAILED: bad test script name" TESTNAME=${BASH_REMATCH[1]} fi if [[ $TESTNAME =~ ([A-Za-z0-9_]+)_32$ ]]; then bitness=_32 TESTNAME_NO_BITNESS=${BASH_REMATCH[1]} else TESTNAME_NO_BITNESS=$TESTNAME fi # The temporary directory we create for this test run. workdir= # Did the test pass? If not, then we'll leave the recording and # output around for developers to debug. leave_data=n # The unique ID allocated to this test directory. nonce= # Set up the environment and working directory. TESTDIR="${SRCDIR}/src/test" export PATH="${OBJDIR}/bin:${PATH}" export LD_LIBRARY_PATH="${OBJDIR}/lib:/usr/local/lib:${LD_LIBRARY_PATH}" which rr >/dev/null 2>&1 if [[ "$?" != "0" ]]; then fatal FAILED: rr not found in PATH "($PATH)" fi if [[ ! -d $SRCDIR ]]; then fatal FAILED: SRCDIR "($SRCDIR)" not found. objdir and srcdir must share the same parent. fi if [[ ! -d $TESTDIR ]]; then fatal FAILED: TESTDIR "($TESTDIR)" not found. fi # Our test programs intentionally crash a lot. Don't generate coredumps for them. ulimit -c 0 # NB: must set up the trap handler *before* mktemp trap onexit EXIT workdir=`mktemp -dt rr-test-$TESTNAME-XXXXXXXXX` cd $workdir # XXX technically the trailing -XXXXXXXXXX isn't unique, since there # could be "foo-123456789" and "bar-123456789", but if that happens, # buy me a lottery ticket. baseworkdir=$(basename ${workdir}) nonce=${baseworkdir#rr-test-$TESTNAME-} ##-------------------------------------------------- ## Now we come to the helpers available to test runners. This is the ## testing "API". ## function fails { why=$1; echo NOTE: Skipping "'$TESTNAME'" because it fails: $why exit 0 } # If the test takes too long to run without the syscallbuf enabled, # use this to prevent it from running when that's the case. function skip_if_no_syscall_buf { if [[ "-n" == "$LIB_ARG" ]]; then echo NOTE: Skipping "'$TESTNAME'" because syscallbuf is disabled exit 0 fi } # If the test is causing an unrealistic failure when the syscallbuf is # enabled, skip it. This better be a temporary situation! function skip_if_syscall_buf { if [[ "-b" == "$LIB_ARG" || "" == "$LIB_ARG" ]]; then echo NOTE: Skipping "'$TESTNAME'" because syscallbuf is enabled exit 0 fi } function just_record { exe=$1; exeargs=$2; _RR_TRACE_DIR="$workdir" \ rr $GLOBAL_OPTIONS record $LIB_ARG $RECORD_ARGS $exe $exeargs 1> record.out 2> record.err } function save_exe { exe=$1; cp ${OBJDIR}/bin/$exe $exe-$nonce } # Record $exe with $exeargs. function record { exe=$1; save_exe $exe just_record ./$exe-$nonce "$2 $3 $4 $5" } # record_async_signal # # Record $test, delivering $signal to it after $delay-secs. # If for some reason delay_kill doesn't run in time, the signal # will not be delivered but the test will not be aborted. function record_async_signal { sig=$1; delay_secs=$2; exe=$3; exeargs=$4; delay_kill $sig $delay_secs $exe-$nonce & record $exe $exeargs wait } function replay { replayflags=$1 _RR_TRACE_DIR="$workdir" \ rr $GLOBAL_OPTIONS replay -a $replayflags 1> replay.out 2> replay.err } # debug [replay-args] # # Load the "expect" script to drive replay of the recording of |exe|. function debug { expectscript=$1; replayargs=$2 _RR_TRACE_DIR="$workdir" \ python2 $TESTDIR/$expectscript.py \ rr $GLOBAL_OPTIONS replay -x $TESTDIR/test_setup.gdb $replayargs if [[ $? == 0 ]]; then passed else failed "debug script failed" fi } function failed { msg=$1; leave_data=y echo "Test '$TESTNAME' FAILED: $msg" } function passed { echo "Test '$TESTNAME' PASSED" } # Check that (i) no error during replay; (ii) recorded and replayed # output match; (iii) the supplied token was found in the output. # Otherwise the test fails. function check { token=$1; if [ ! -f record.out -o ! -f replay.err -o ! -f replay.out -o ! -f record.err ]; then failed "output files not found." elif [[ $(cat record.err) != "" ]]; then failed ": error during recording:" echo "--------------------------------------------------" cat record.err echo "--------------------------------------------------" echo "record.out:" echo "--------------------------------------------------" cat record.out echo "--------------------------------------------------" elif [[ $(cat replay.err) != "" ]]; then failed ": error during replay:" echo "--------------------------------------------------" cat replay.err echo "--------------------------------------------------" echo "replay.out:" echo "--------------------------------------------------" cat replay.out echo "--------------------------------------------------" elif [[ $(diff record.out replay.out) != "" ]]; then failed ": output from recording different than replay" echo "diff -U8 $workdir/record.out $workdir/replay.out" diff -U8 record.out replay.out elif [[ "$token" != "" && "record.out" != $(grep -l "$token" record.out) ]]; then failed ": token '$token' not in record.out:" echo "--------------------------------------------------" cat record.out echo "--------------------------------------------------" else passed fi } # compare_test [] [executable] # # Record the test name passed to |util.sh|, then replay it (optionally # with $replayflags) and verify record/replay output match and $token # appears in the output. Uses $executable instead of the passed-in testname # if present. function compare_test { token=$1; replayflags=$2; test=$TESTNAME if (( $# >= 3 )); then test=$3 fi if [[ $token == "" ]]; then failed ": didn't pass an exit token" fi record $test replay $replayflags check $token } # debug_test # # Record the test name passed to |util.sh|, then replay the recording # using the "expect" script $test-name.py, which is responsible for # computing test pass/fail. function debug_test { record $TESTNAME debug $TESTNAME_NO_BITNESS } # Return the number of events in the most recent local recording. function count_events { local events=$(rr $GLOBAL_OPTIONS dump -r latest-trace | wc -l) # The |simple| test is just about the simplest possible C program, # and has around 180 events (when recorded on a particular # developer's machine). If we count a number of events # significalty less than that, almost certainly something has gone # wrong. if [ "$events" -le 150 ]; then failed ": Recording had too few events. Is |rr dump -r| broken?" fi # This event count is used to loop over attaching the debugger. # The tests assume that the debugger can be attached at all # events, but at the very last events, EXIT and so forth, rr can't # attach the debugger. So we fudge the event count down to avoid # that edge case. let "events -= 10" echo $events } # Return a random number from the range [min, max], inclusive. function rand_range { min=$1; max=$2 local num=$RANDOM local range="" let "range = 1 + $max - $min" let "num %= $range" let "num += $min" echo $num } # Record |exe|, then replay it using the |restart_finish| debugger # script attaching at every recorded event. To make the # debugger-replays more practical, the events are strided between at a # random interval between [min, max], inclusive. # # So for example, |checkpoint_test simple 3 5| means to record the # "simple" test, and attach the debugger at every X'th event, where X # is a random number in [3, 5]. function checkpoint_test { exe=$1; min=$2; max=$3; record $exe num_events=$(count_events) stride=$(rand_range $min $max) for i in $(seq 1 $stride $num_events); do echo Checkpointing at event $i ... debug restart_finish "-g $i" if [[ "$leave_data" == "y" ]]; then break fi done } rr-4.1.0/src/test/utimes.c000066400000000000000000000015241265436462100153720ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static const int MOD_TIME = 888888; static const int ACCESS_TIME = 999999; int main(int argc, char* argv[]) { char path[] = "rr-test-file-XXXXXX"; int fd = mkstemp(path); struct utimbuf utim = { ACCESS_TIME, MOD_TIME }; struct timeval tv[2] = { { ACCESS_TIME + 1, 0 }, { MOD_TIME + 1, 0 } }; struct stat st; test_assert(0 <= fd); test_assert(0 == utime(path, &utim)); test_assert(0 == fstat(fd, &st)); test_assert(st.st_atime == ACCESS_TIME); test_assert(st.st_mtime == MOD_TIME); test_assert(0 == utimes(path, tv)); test_assert(0 == fstat(fd, &st)); test_assert(st.st_atime == ACCESS_TIME + 1); test_assert(st.st_mtime == MOD_TIME + 1); test_assert(0 == unlink(path)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/vfork.c000066400000000000000000000007611265436462100152150ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { const char* exe; pid_t child; int status; test_assert(2 == argc); exe = argv[1]; if (0 == (child = vfork())) { execl(exe, exe, NULL); test_assert("Not reached" && 0); } test_assert(child == waitpid(child, &status, 0)); test_assert(WIFEXITED(status) && 0 == WEXITSTATUS(status)); atomic_puts("vforker-EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/vfork.run000066400000000000000000000001761265436462100155770ustar00rootroot00000000000000source `dirname $0`/util.sh save_exe simple$bitness record $TESTNAME simple$bitness-$nonce replay check vforker-EXIT-SUCCESS rr-4.1.0/src/test/vfork_flush.c000066400000000000000000000012171265436462100164130ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void unblock_signals(void) { sigset_t mask; sigemptyset(&mask); sigprocmask(SIG_SETMASK, &mask, NULL); } int main(int argc, char* argv[]) { pid_t child; int status; if (0 == (child = vfork())) { /* Unblocking SIGSYS should be OK */ unblock_signals(); test_assert(0 == close(0)); _exit(77); } test_assert(child == waitpid(child, &status, 0)); test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 77); /* Unblocking SIGSYS should be OK */ unblock_signals(); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/video_capture.c000066400000000000000000000075111265436462100167170ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static const char device_name[] = "/dev/video0"; struct buffer { struct v4l2_buffer vbuf; unsigned char* mmap_data; }; static struct buffer buffers[4]; static size_t buffer_count; static void no_v4l2(void) { atomic_puts("EXIT-SUCCESS"); exit(0); } static int open_device(void) { struct v4l2_capability cap; int fd = open("/dev/video0", O_RDWR); int ret; if (fd < 0 && errno == ENOENT) { atomic_printf("%s not found; aborting test\n", device_name); no_v4l2(); } if (fd < 0 && errno == EACCES) { atomic_printf("%s not accessible; aborting test\n", device_name); no_v4l2(); } test_assert(fd >= 0); ret = ioctl(fd, VIDIOC_QUERYCAP, &cap); if (ret < 0 && errno == EINVAL) { atomic_printf("%s is not a V4L2 device; aborting test\n", device_name); no_v4l2(); } if (ret < 0 && errno == EACCES) { atomic_printf("%s is not accessible; aborting test\n", device_name); no_v4l2(); } if (!(cap.capabilities & V4L2_CAP_VIDEO_CAPTURE)) { atomic_printf("%s is not a V4L2 capture device; aborting test\n", device_name); no_v4l2(); } if (!(cap.capabilities & V4L2_CAP_STREAMING)) { atomic_printf("%s does not support streaming; aborting test\n", device_name); no_v4l2(); } return fd; } static void init_device(int fd) { struct v4l2_format fmt; struct v4l2_requestbuffers req; int ret; size_t i; enum v4l2_buf_type type; fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; test_assert(0 == ioctl(fd, VIDIOC_G_FMT, &fmt)); atomic_printf("%s returning %dx%d frames\n", device_name, fmt.fmt.pix.width, fmt.fmt.pix.height); req.count = 4; req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; req.memory = V4L2_MEMORY_MMAP; ret = ioctl(fd, VIDIOC_REQBUFS, &req); if (ret < 0 && errno == EINVAL) { atomic_printf("%s does not support memory mapping; aborting test\n", device_name); no_v4l2(); } if (ret < 0 && errno == EBUSY) { atomic_printf("%s is busy; aborting test\n", device_name); no_v4l2(); } test_assert(0 == ret); if (req.count < 2) { atomic_printf("%s only supports one buffer; aborting test\n", device_name); no_v4l2(); } buffer_count = req.count; for (i = 0; i < buffer_count; ++i) { struct buffer* buf = buffers + i; buf->vbuf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; buf->vbuf.memory = V4L2_MEMORY_MMAP; buf->vbuf.index = i; test_assert(0 == ioctl(fd, VIDIOC_QUERYBUF, &buf->vbuf)); buf->mmap_data = mmap(NULL, buf->vbuf.length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, buf->vbuf.m.offset); test_assert(buf->mmap_data != MAP_FAILED); test_assert(0 == ioctl(fd, VIDIOC_QBUF, &buf->vbuf)); } type = V4L2_BUF_TYPE_VIDEO_CAPTURE; test_assert(0 == ioctl(fd, VIDIOC_STREAMON, &type)); } static void read_frames(int fd) { size_t i, j; for (i = 0; i < buffer_count * 2; ++i) { struct v4l2_buffer buf; int ret; size_t bytes; buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; buf.memory = V4L2_MEMORY_MMAP; ret = ioctl(fd, VIDIOC_DQBUF, &buf); test_assert(ret == 0); test_assert(buf.index < buffer_count); bytes = buf.length < 16 ? buf.length : 16; atomic_printf("Frame %d: buffer %d: ", (int)i, (int)buf.index); for (j = 0; j < bytes; ++j) { atomic_printf("%2x ", buffers[buf.index].mmap_data[j]); } atomic_printf("...\n"); test_assert(0 == ioctl(fd, VIDIOC_QBUF, &buf)); } } static void close_device(int fd) { enum v4l2_buf_type type; type = V4L2_BUF_TYPE_VIDEO_CAPTURE; test_assert(0 == ioctl(fd, VIDIOC_STREAMOFF, &type)); } int main(int argc, char* argv[]) { int fd = open_device(); init_device(fd); read_frames(fd); close_device(fd); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/wait.c000066400000000000000000000020731265436462100150300ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { int i = 0; pid_t pid; int status; siginfo_t si; ++i; pid = fork(); if (!pid) { usleep(100); exit(i); } test_assert(pid == wait(&status)); atomic_printf("%d exited with status %#x\n", pid, status); test_assert(WIFEXITED(status) && i == WEXITSTATUS(status)); ++i; pid = fork(); if (!pid) { usleep(100); exit(i); } test_assert(pid == waitpid(pid, &status, 0)); atomic_printf("%d exited with status %#x\n", pid, status); test_assert(WIFEXITED(status) && i == WEXITSTATUS(status)); ++i; pid = fork(); if (!pid) { usleep(100); exit(i); } test_assert(0 == waitid(P_PID, pid, &si, WEXITED | WSTOPPED)); atomic_printf("%d exited with exit-type %d; code %d\n", si.si_pid, si.si_code, si.si_status); test_assert(SIGCHLD == si.si_signo && CLD_EXITED == si.si_code); test_assert(pid == si.si_pid && i == si.si_status); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/watchpoint.c000066400000000000000000000007531265436462100162470ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpoint(void) { int break_here = 1; (void)break_here; } static int var; static void* thread(void* unused) { var = 1337; return NULL; } int main(int argc, char* argv[]) { pthread_t t; breakpoint(); var = 42; pthread_create(&t, NULL, thread, NULL); pthread_join(t, NULL); atomic_printf("var=%d\n", var); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/watchpoint.py000066400000000000000000000022011265436462100164430ustar00rootroot00000000000000from rrutil import * send_gdb('break main') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') # Test write watchpoint send_gdb('p &var') expect_gdb(r'\$1 = \(int \*\) ') send_gdb('watch *$1') expect_gdb('Hardware[()/a-z ]+watchpoint 2') send_gdb('c') expect_gdb('Old value = 0') expect_gdb('New value = 42') send_gdb('c') expect_gdb('Old value = 42') expect_gdb('New value = 1337') restart_replay() expect_gdb('Breakpoint 1') # Test read-write watchpoint send_gdb('delete 2') send_gdb('awatch *$1') expect_gdb('Hardware[()/a-z ]+watchpoint 3') send_gdb('c') expect_gdb('Old value = 0') expect_gdb('New value = 42') send_gdb('c') expect_gdb('Old value = 42') expect_gdb('New value = 1337') send_gdb('c') expect_gdb('Value = 1337') restart_replay() expect_gdb('Breakpoint 1') # Test read watchpoint. x86 treats these as read-write. send_gdb('delete 3') send_gdb('rwatch *$1') expect_gdb('Hardware[()/a-z ]+watchpoint 4') send_gdb('c') expect_gdb('Value = 42') send_gdb('c') expect_gdb('Value = 1337') send_gdb('c') expect_gdb('Value = 1337') send_gdb('c') expect_rr('EXIT-SUCCESS') expect_gdb('exited normally') ok() rr-4.1.0/src/test/watchpoint.run000066400000000000000000000000471265436462100166250ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/watchpoint_syscall.c000066400000000000000000000013601265436462100177740ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static void breakpoint(void) {} static char* p; int main(int argc, char* argv[]) { int fd = open("/dev/zero", O_RDONLY); test_assert(fd >= 0); p = (char*)mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); test_assert(p != MAP_FAILED); breakpoint(); *p = 'a'; test_assert(1 == read(fd, p, 1)); test_assert(*p == 0); *p = 'b'; test_assert(p == mmap(p, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0)); test_assert(*p == 0); test_assert(0 == munmap(p, PAGE_SIZE)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/watchpoint_syscall.py000066400000000000000000000011741265436462100202050ustar00rootroot00000000000000from rrutil import * send_gdb('break breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('watch -l *p') expect_gdb('Hardware[()/a-z ]+watchpoint 2') send_gdb('c') expect_gdb('Old value = 0') expect_gdb('New value = 97') send_gdb('c') expect_gdb('Old value = 97') expect_gdb('New value = 0') send_gdb('c') expect_gdb('Old value = 0') expect_gdb('New value = 98') send_gdb('c') expect_gdb('Old value = 98') expect_gdb('New value = 0') send_gdb('c') expect_gdb('Old value = 0') expect_gdb('New value = ') send_gdb('c') expect_rr('EXIT-SUCCESS') expect_gdb('exited normally') ok() rr-4.1.0/src/test/watchpoint_syscall.run000066400000000000000000000000471265436462100203570ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/watchpoint_unaligned.c000066400000000000000000000007701265436462100202740ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static uint16_t* p2; static uint32_t* p4; static uint64_t* p8; static void breakpoint(void) {} int main(int argc, char* argv[]) { char* m = malloc(0x1000); void* unaligned_p = (void*)((uintptr_t)m | 0xff); p2 = unaligned_p; p4 = unaligned_p; p8 = unaligned_p; breakpoint(); *p2 = 1; breakpoint(); *p4 = 2; breakpoint(); *p8 = 3; atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/watchpoint_unaligned.py000066400000000000000000000010771265436462100205030ustar00rootroot00000000000000from rrutil import * send_gdb('b breakpoint') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('watch -l *p2') expect_gdb('Hardware watchpoint 2') send_gdb('c') expect_gdb('watchpoint 2') send_gdb('delete 2') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('watch -l *p4') expect_gdb('Hardware watchpoint 3') send_gdb('c') expect_gdb('watchpoint 3') send_gdb('delete 3') send_gdb('c') expect_gdb('Breakpoint 1') send_gdb('watch -l *p8') expect_gdb('Hardware watchpoint 4') send_gdb('c') expect_gdb('watchpoint 4') send_gdb('delete 4') ok() rr-4.1.0/src/test/watchpoint_unaligned.run000066400000000000000000000000471265436462100206530ustar00rootroot00000000000000source `dirname $0`/util.sh debug_test rr-4.1.0/src/test/when.py000066400000000000000000000016111265436462100152300ustar00rootroot00000000000000from rrutil import * import re send_gdb('when') expect_gdb(re.compile(r'Current event: (\d+)')) t = eval(last_match().group(1)); if t < 1 or t > 10000: failed('ERROR in first "when"') send_gdb('when-ticks') expect_gdb(re.compile(r'Current tick: (\d+)')) ticks = eval(last_match().group(1)); if ticks != 0: failed('ERROR in first "when-ticks"') send_gdb('b main') expect_gdb('Breakpoint 1') send_gdb('c') send_gdb('when') expect_gdb(re.compile(r'Current event: (\d+)')) t2 = eval(last_match().group(1)); if t2 < 1 or t2 > 10000: failed('ERROR in second "when"') if t2 <= t: failed('ERROR ... "when" failed to advance') send_gdb('when-ticks') expect_gdb(re.compile(r'Current tick: (\d+)')) ticks2 = eval(last_match().group(1)); if ticks2 < 0 or ticks2 > 100000: failed('ERROR in second "when-ticks"') if ticks2 <= ticks: failed('ERROR ... "when-ticks" failed to advance') ok() rr-4.1.0/src/test/when.run000066400000000000000000000000751265436462100154070ustar00rootroot00000000000000source `dirname $0`/util.sh record simple$bitness debug when rr-4.1.0/src/test/write_race.c000066400000000000000000000010761265436462100162120ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" #define NUM_PROCESSES 4 #define NUM_ITERATIONS 500 int main(int argc, char* argv[]) { int i; int j; for (i = 0; i < NUM_PROCESSES; ++i) { if (0 == fork()) { for (j = 0; j < NUM_ITERATIONS; ++j) { char buf[1000]; sprintf(buf, "Child %d writing line %d\n", i, j); write(1, buf, strlen(buf)); } return 0; } } for (i = 0; i < NUM_PROCESSES; ++i) { wait(NULL); } atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/writev.c000066400000000000000000000020701265436462100154010ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static char data[10] = "0123456789"; static void test(int use_pwritev) { char name[] = "/tmp/rr-readv-XXXXXX"; int fd = mkstemp(name); struct { char ch[50]; } * buf; struct iovec iovs[2]; ssize_t nwritten; test_assert(fd >= 0); test_assert(0 == unlink(name)); iovs[0].iov_base = data; iovs[0].iov_len = 7; iovs[1].iov_base = data + iovs[0].iov_len; iovs[1].iov_len = sizeof(data) - iovs[0].iov_len; if (use_pwritev) { /* Work around busted pwritev prototype in older libcs */ nwritten = syscall(SYS_pwritev, fd, iovs, 2, 0, 0); } else { nwritten = writev(fd, iovs, 2); } test_assert(sizeof(data) == nwritten); ALLOCATE_GUARD(buf, 'x'); test_assert(sizeof(data) == pread(fd, buf, sizeof(*buf), 0)); test_assert(0 == memcmp(buf, data, sizeof(data))); test_assert(buf->ch[sizeof(data)] == 'x'); VERIFY_GUARD(buf); } int main(int argc, char* argv[]) { test(0); test(1); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/xattr.c000066400000000000000000000031731265436462100152300ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" static const char attr_name[] = "user.testAttr"; static const char attr_value[] = "hello kitty"; int main(int argc, char* argv[]) { char path[PATH_MAX]; const char* home = getenv("HOME"); int fd; int ret; snprintf(path, sizeof(path), "%s/rr-xattr-XXXXXX", home ? home : "/tmp"); path[sizeof(path) - 1] = 0; fd = mkstemp(path); test_assert(0 <= fd); ret = setxattr(path, attr_name, attr_value, sizeof(attr_value), XATTR_CREATE); if (ret < 0 && errno == EOPNOTSUPP) { atomic_printf("Extended attributes not supported on file %s, " "skipping test\n", path); } else { char buf[sizeof(attr_value) + 1]; test_assert(ret == 0); memset(buf, '-', sizeof(buf)); ret = fgetxattr(fd, attr_name, buf, sizeof(buf) - 5); test_assert(ret == -1); test_assert(errno == ERANGE); test_assert(buf[0] == '-'); ret = fsetxattr(fd, attr_name, attr_value, sizeof(attr_value), XATTR_REPLACE); test_assert(ret == 0); ret = getxattr(path, attr_name, buf, sizeof(buf)); test_assert(ret == sizeof(attr_value)); test_assert(0 == memcmp(attr_value, buf, sizeof(attr_value))); test_assert(buf[sizeof(attr_value)] == '-'); ret = fremovexattr(fd, attr_name); test_assert(ret == 0); memset(buf, '-', sizeof(buf)); ret = getxattr(path, attr_name, buf, sizeof(buf)); test_assert(ret == -1); test_assert(errno == ENODATA); test_assert(buf[0] == '-'); } test_assert(0 == unlink(path)); atomic_puts("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/test/zero_length_read.c000066400000000000000000000004261265436462100173770ustar00rootroot00000000000000/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "rrutil.h" int main(int argc, char* argv[]) { char buf[1024]; ssize_t count = read(STDIN_FILENO, &buf[0], 0); test_assert(count == 0); atomic_printf("EXIT-SUCCESS"); return 0; } rr-4.1.0/src/util.cc000066400000000000000000000533741265436462100142370ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ //#define DEBUGTAG "Util" //#define FIRST_INTERESTING_EVENT 10700 //#define LAST_INTERESTING_EVENT 10900 #include "util.h" #include #include #include #include #include #include #include #include #include #include #include #include #include "preload/preload_interface.h" #include "AddressSpace.h" #include "AutoRemoteSyscalls.h" #include "Flags.h" #include "kernel_abi.h" #include "kernel_metadata.h" #include "log.h" #include "ReplaySession.h" #include "seccomp-bpf.h" #include "task.h" #include "TraceStream.h" using namespace std; using namespace rr; // FIXME this function assumes that there's only one address space. // Should instead only look at the address space of the task in // question. static bool is_start_of_scratch_region(Task* t, remote_ptr start_addr) { for (auto& kv : t->session().tasks()) { Task* c = kv.second; if (start_addr == c->scratch_ptr) { return true; } } return false; } bool probably_not_interactive(int fd) { /* Eminently tunable heuristic, but this is guaranteed to be * true during rr unit tests, where we care most about this * check (to a first degree). A failing test shouldn't * hang. */ return !isatty(fd); } int clone_flags_to_task_flags(int flags_arg) { int flags = CLONE_SHARE_NOTHING; // See task.h for description of the flags. flags |= (CLONE_CHILD_CLEARTID & flags_arg) ? CLONE_CLEARTID : 0; flags |= (CLONE_SETTLS & flags_arg) ? CLONE_SET_TLS : 0; flags |= (CLONE_SIGHAND & flags_arg) ? CLONE_SHARE_SIGHANDLERS : 0; flags |= (CLONE_THREAD & flags_arg) ? CLONE_SHARE_TASK_GROUP : 0; flags |= (CLONE_VM & flags_arg) ? CLONE_SHARE_VM : 0; flags |= (CLONE_FILES & flags_arg) ? CLONE_SHARE_FILES : 0; return flags; } size_t page_size() { return sysconf(_SC_PAGE_SIZE); } size_t ceil_page_size(size_t sz) { size_t page_mask = ~(page_size() - 1); return (sz + page_size() - 1) & page_mask; } size_t floor_page_size(size_t sz) { size_t page_mask = ~(page_size() - 1); return sz & page_mask; } remote_ptr ceil_page_size(remote_ptr addr) { return remote_ptr(ceil_page_size(addr.as_int())); } remote_ptr floor_page_size(remote_ptr addr) { return remote_ptr(floor_page_size(addr.as_int())); } /** * Dump |buf_len| words in |buf| to |out|, starting with a line * containing |label|. See |dump_binary_data()| for a description of * the remaining parameters. */ static void dump_binary_chunk(FILE* out, const char* label, const uint32_t* buf, size_t buf_len, remote_ptr start_addr) { int i; fprintf(out, "%s\n", label); for (i = 0; i < ssize_t(buf_len); i += 1) { uint32_t word = buf[i]; fprintf(out, "0x%08x | [%p]\n", word, (void*)(start_addr.as_int() + i * sizeof(*buf))); } } void dump_binary_data(const char* filename, const char* label, const uint32_t* buf, size_t buf_len, remote_ptr start_addr) { FILE* out = fopen64(filename, "w"); if (!out) { return; } dump_binary_chunk(out, label, buf, buf_len, start_addr); fclose(out); } void format_dump_filename(Task* t, TraceFrame::Time global_time, const char* tag, char* filename, size_t filename_size) { snprintf(filename, filename_size - 1, "%s/%d_%d_%s", t->trace_dir().c_str(), t->rec_tid, global_time, tag); } bool should_dump_memory(Task* t, const TraceFrame& f) { const Flags* flags = &Flags::get(); #if defined(FIRST_INTERESTING_EVENT) int is_syscall_exit = event >= 0 && state == STATE_SYSCALL_EXIT; if (is_syscall_exit && RECORD == Flags->option && FIRST_INTERESTING_EVENT <= global_time && global_time <= LAST_INTERESTING_EVENT) { return true; } if (global_time > LAST_INTERESTING_EVENT) { return false; } #endif return flags->dump_on == Flags::DUMP_ON_ALL || flags->dump_at == int(f.time()); } void dump_process_memory(Task* t, TraceFrame::Time global_time, const char* tag) { char filename[PATH_MAX]; FILE* dump_file; format_dump_filename(t, global_time, tag, filename, sizeof(filename)); dump_file = fopen64(filename, "w"); const AddressSpace& as = *(t->vm()); for (auto m : as.maps()) { vector mem; mem.resize(m.map.size()); ssize_t mem_len = t->read_bytes_fallible(m.map.start(), m.map.size(), mem.data()); mem_len = max(ssize_t(0), mem_len); if (!is_start_of_scratch_region(t, m.map.start())) { dump_binary_chunk(dump_file, m.map.str().c_str(), (const uint32_t*)mem.data(), mem_len / sizeof(uint32_t), m.map.start()); } } fclose(dump_file); } static void notify_checksum_error(Task* t, TraceFrame::Time global_time, unsigned checksum, unsigned rec_checksum, const string& raw_map_line) { char cur_dump[PATH_MAX]; char rec_dump[PATH_MAX]; dump_process_memory(t, global_time, "checksum_error"); /* TODO: if the right recorder memory dump is present, * automatically compare them, taking the oddball * not-mapped-during-replay region(s) into account. And if * not present, tell the user how to make one in a future * run. */ format_dump_filename(t, global_time, "checksum_error", cur_dump, sizeof(cur_dump)); format_dump_filename(t, global_time, "rec", rec_dump, sizeof(rec_dump)); const Event& ev = t->current_trace_frame().event(); ASSERT(t, checksum == rec_checksum) << "Divergence in contents of memory segment after '" << ev << "':\n" "\n" << raw_map_line << " (recorded checksum:" << HEX(rec_checksum) << "; replaying checksum:" << HEX(checksum) << ")\n" "\n" << "Dumped current memory contents to " << cur_dump << ". If you've created a memory dump for\n" << "the '" << ev << "' event (line " << t->trace_time() << ") during recording by using, for example with\n" << "the args\n" "\n" << "$ rr --dump-at=" << t->trace_time() << " record ...\n" "\n" << "then you can use the following to determine which memory cells " "differ:\n" "\n" << "$ diff -u " << rec_dump << " " << cur_dump << " > mem-diverge.diff\n"; } /** * This helper does the heavy lifting of storing or validating * checksums. The iterator data determines which behavior the helper * function takes on, and to/from which file it writes/read. */ enum ChecksumMode { STORE_CHECKSUMS, VALIDATE_CHECKSUMS }; struct checksum_iterator_data { ChecksumMode mode; FILE* checksums_file; TraceFrame::Time global_time; }; static bool checksum_segment_filter(const AddressSpace::Mapping& m) { struct stat st; int may_diverge; if (stat(m.map.fsname().c_str(), &st)) { /* If there's no persistent resource backing this * mapping, we should expect it to change. */ LOG(debug) << "CHECKSUMMING unlinked '" << m.map.fsname() << "'"; return true; } /* If we're pretty sure the backing resource is effectively * immutable, skip checksumming, it's a waste of time. Except * if the mapping is mutable, for example the rw data segment * of a system library, then it's interesting. */ may_diverge = should_copy_mmap_region(m.map, st) || (PROT_WRITE & m.map.prot()); LOG(debug) << (may_diverge ? "CHECKSUMMING" : " skipping") << " '" << m.map.fsname() << "'"; return may_diverge; } /** * Either create and store checksums for each segment mapped in |t|'s * address space, or validate an existing computed checksum. Behavior * is selected by |mode|. */ static void iterate_checksums(Task* t, ChecksumMode mode, TraceFrame::Time global_time) { struct checksum_iterator_data c; memset(&c, 0, sizeof(c)); char filename[PATH_MAX]; const char* fmode = (STORE_CHECKSUMS == mode) ? "w" : "r"; c.mode = mode; snprintf(filename, sizeof(filename) - 1, "%s/%d_%d", t->trace_dir().c_str(), global_time, t->rec_tid); c.checksums_file = fopen64(filename, fmode); c.global_time = global_time; if (!c.checksums_file) { FATAL() << "Failed to open checksum file " << filename; } const AddressSpace& as = *(t->vm()); for (auto m : as.maps()) { vector mem; ssize_t valid_mem_len = 0; if (checksum_segment_filter(m)) { mem.resize(m.map.size()); valid_mem_len = t->read_bytes_fallible(m.map.start(), m.map.size(), mem.data()); valid_mem_len = max(ssize_t(0), valid_mem_len); } unsigned* buf = (unsigned*)mem.data(); unsigned checksum = 0; int i; if (m.map.fsname().find(SYSCALLBUF_SHMEM_PATH_PREFIX) == 0) { /* The syscallbuf consists of a region that's written * deterministically wrt the trace events, and a * region that's written nondeterministically in the * same way as trace scratch buffers. The * deterministic region comprises committed syscallbuf * records, and possibly the one pending record * metadata. The nondeterministic region starts at * the "extra data" for the possibly one pending * record. * * So here, we set things up so that we only checksum * the deterministic region. */ auto child_hdr = m.map.start().cast(); auto hdr = t->read_mem(child_hdr); valid_mem_len = !buf ? 0 : sizeof(hdr) + hdr.num_rec_bytes + sizeof(struct syscallbuf_record); } ASSERT(t, buf || valid_mem_len == 0); for (i = 0; i < ssize_t(valid_mem_len / sizeof(*buf)); ++i) { checksum += buf[i]; } string raw_map_line = m.map.str(); if (STORE_CHECKSUMS == c.mode) { fprintf(c.checksums_file, "(%x) %s\n", checksum, raw_map_line.c_str()); } else { char line[1024]; unsigned rec_checksum; unsigned long rec_start; unsigned long rec_end; int nparsed; fgets(line, sizeof(line), c.checksums_file); nparsed = sscanf(line, "(%x) %lx-%lx", &rec_checksum, &rec_start, &rec_end); remote_ptr rec_start_addr = rec_start; remote_ptr rec_end_addr = rec_end; ASSERT(t, 3 == nparsed) << "Only parsed " << nparsed << " items"; ASSERT(t, rec_start_addr == m.map.start() && rec_end_addr == m.map.end()) << "Segment " << rec_start_addr << "-" << rec_end_addr << " changed to " << m.map << "??"; if (is_start_of_scratch_region(t, rec_start_addr)) { /* Replay doesn't touch scratch regions, so * their contents are allowed to diverge. * Tracees can't observe those segments unless * they do something sneaky (or disastrously * buggy). */ LOG(debug) << "Not validating scratch starting at 0x" << hex << rec_start_addr << dec; continue; } if (checksum != rec_checksum) { notify_checksum_error(t, c.global_time, checksum, rec_checksum, raw_map_line.c_str()); } } } fclose(c.checksums_file); } bool should_checksum(Task* t, const TraceFrame& f) { int checksum = Flags::get().checksum; bool is_syscall_exit = EV_SYSCALL == f.event().type() && EXITING_SYSCALL == f.event().Syscall().state; #if defined(FIRST_INTERESTING_EVENT) if (is_syscall_exit && FIRST_INTERESTING_EVENT <= global_time && global_time <= LAST_INTERESTING_EVENT) { return true; } if (global_time > LAST_INTERESTING_EVENT) { return false; } #endif if (Flags::CHECKSUM_NONE == checksum) { return false; } if (Flags::CHECKSUM_ALL == checksum) { return true; } if (Flags::CHECKSUM_SYSCALL == checksum) { return is_syscall_exit; } /* |checksum| is a global time point. */ return checksum <= int(f.time()); } void checksum_process_memory(Task* t, TraceFrame::Time global_time) { iterate_checksums(t, STORE_CHECKSUMS, global_time); } void validate_process_memory(Task* t, TraceFrame::Time global_time) { iterate_checksums(t, VALIDATE_CHECKSUMS, global_time); } signal_action default_action(int sig) { if (32 <= sig && sig <= 64) { return TERMINATE; } switch (sig) { /* TODO: SSoT for signal defs/semantics. */ #define CASE(_sig, _act) \ case SIG##_sig: \ return _act CASE(HUP, TERMINATE); CASE(INT, TERMINATE); CASE(QUIT, DUMP_CORE); CASE(ILL, DUMP_CORE); CASE(ABRT, DUMP_CORE); CASE(FPE, DUMP_CORE); CASE(KILL, TERMINATE); CASE(SEGV, DUMP_CORE); CASE(PIPE, TERMINATE); CASE(ALRM, TERMINATE); CASE(TERM, TERMINATE); CASE(USR1, TERMINATE); CASE(USR2, TERMINATE); CASE(CHLD, IGNORE); CASE(CONT, CONTINUE); CASE(STOP, STOP); CASE(TSTP, STOP); CASE(TTIN, STOP); CASE(TTOU, STOP); CASE(BUS, DUMP_CORE); /*CASE(POLL, TERMINATE);*/ CASE(PROF, TERMINATE); CASE(SYS, DUMP_CORE); CASE(TRAP, DUMP_CORE); CASE(URG, IGNORE); CASE(VTALRM, TERMINATE); CASE(XCPU, DUMP_CORE); CASE(XFSZ, DUMP_CORE); /*CASE(IOT, DUMP_CORE);*/ /*CASE(EMT, TERMINATE);*/ CASE(STKFLT, TERMINATE); CASE(IO, TERMINATE); CASE(PWR, TERMINATE); /*CASE(LOST, TERMINATE);*/ CASE(WINCH, IGNORE); default: FATAL() << "Unknown signal " << sig; return TERMINATE; // not reached #undef CASE } } SignalDeterministic is_deterministic_signal(const siginfo_t& si) { switch (si.si_signo) { /* These signals may be delivered deterministically; * we'll check for sure below. */ case SIGILL: case SIGTRAP: case SIGBUS: case SIGFPE: case SIGSEGV: /* As bits/siginfo.h documents, * * Values for `si_code'. Positive values are * reserved for kernel-generated signals. * * So if the signal is maybe-synchronous, and the * kernel delivered it, then it must have been * delivered deterministically. */ return si.si_code > 0 ? DETERMINISTIC_SIG : NONDETERMINISTIC_SIG; default: /* All other signals can never be delivered * deterministically (to the approximation required by * rr). */ return NONDETERMINISTIC_SIG; } } static bool has_fs_name(const string& path) { struct stat dummy; return 0 == stat(path.c_str(), &dummy); } static bool is_tmp_file(const string& path) { struct statfs sfs; statfs(path.c_str(), &sfs); return (TMPFS_MAGIC == sfs.f_type // In observed configurations of Ubuntu 13.10, /tmp is // a folder in the / fs, not a separate tmpfs. || path.c_str() == strstr(path.c_str(), "/tmp/")); } bool should_copy_mmap_region(const KernelMapping& mapping, const struct stat& stat) { int flags = mapping.flags(); int prot = mapping.prot(); const string& file_name = mapping.fsname(); bool private_mapping = (flags & MAP_PRIVATE); // TODO: handle mmap'd files that are unlinked during // recording. if (!has_fs_name(file_name)) { LOG(debug) << " copying unlinked file"; return true; } if (is_tmp_file(file_name)) { LOG(debug) << " copying file on tmpfs"; return true; } if (file_name == "/etc/ld.so.cache") { // This file changes on almost every system update so we should copy it. LOG(debug) << " copying " << file_name; return true; } if (private_mapping && (prot & PROT_EXEC)) { /* Be optimistic about private executable mappings */ LOG(debug) << " (no copy for +x private mapping " << file_name << ")"; return false; } if (private_mapping && (0111 & stat.st_mode)) { /* A private mapping of an executable file usually * indicates mapping data sections of object files. * Since we're already assuming those change very * infrequently, we can avoid copying the data * sections too. */ LOG(debug) << " (no copy for private mapping of +x " << file_name << ")"; return false; } // TODO: using "can the euid of the rr process write this // file" as an approximation of whether the tracee can write // the file. If the tracee is messing around with // set*[gu]id(), the real answer may be different. bool can_write_file = (0 == access(file_name.c_str(), W_OK)); if (!can_write_file && 0 == stat.st_uid) { // We would like to assert this, but on Ubuntu 13.10, // the file /lib/i386-linux-gnu/libdl-2.17.so is // writeable by root for unknown reasons. // assert(!(prot & PROT_WRITE)); /* Mapping a file owned by root: we don't care if this * was a PRIVATE or SHARED mapping, because unless the * program is disastrously buggy or unlucky, the * mapping is effectively PRIVATE. Bad luck can come * from this program running during a system update, * or a user being added, which is probably less * frequent than even system updates. * * XXX what about the fontconfig cache files? */ LOG(debug) << " (no copy for root-owned " << file_name << ")"; return false; } if (private_mapping) { /* Some programs (at least Firefox) have been observed * to use cache files that are expected to be * consistent and unchanged during the bulk of * execution, but may be destroyed or mutated at * shutdown in preparation for the next session. We * don't otherwise know what to do with private * mappings, so err on the safe side. * * TODO: could get into dirty heuristics here like * trying to match "cache" in the filename ... */ LOG(debug) << " copying private mapping of non-system -x " << file_name; return true; } if (!(0222 & stat.st_mode)) { /* We couldn't write the file because it's read only. * But it's not a root-owned file (therefore not a * system file), so it's likely that it could be * temporary. Copy it. */ LOG(debug) << " copying read-only, non-system file"; return true; } if (!can_write_file) { /* mmap'ing another user's (non-system) files? Highly * irregular ... */ FATAL() << "Unhandled mmap " << file_name << "(prot:" << HEX(prot) << ((flags & MAP_SHARED) ? ";SHARED" : "") << "); uid:" << stat.st_uid << " mode:" << stat.st_mode; } return true; } void resize_shmem_segment(ScopedFd& fd, uint64_t num_bytes) { if (ftruncate(fd, num_bytes)) { FATAL() << "Failed to resize shmem to " << num_bytes; } } void cpuid(int code, int subrequest, unsigned int* a, unsigned int* c, unsigned int* d) { asm volatile("cpuid" : "=a"(*a), "=c"(*c), "=d"(*d) : "a"(code), "c"(subrequest) : "ebx"); } template static void extract_clone_parameters_arch(const Registers& regs, remote_ptr* stack, remote_ptr* parent_tid, remote_ptr* tls, remote_ptr* child_tid) { switch (Arch::clone_parameter_ordering) { case Arch::FlagsStackParentTLSChild: if (stack) { *stack = regs.arg2(); } if (parent_tid) { *parent_tid = regs.arg3(); } if (tls) { *tls = regs.arg4(); } if (child_tid) { *child_tid = regs.arg5(); } break; case Arch::FlagsStackParentChildTLS: if (stack) { *stack = regs.arg2(); } if (parent_tid) { *parent_tid = regs.arg3(); } if (child_tid) { *child_tid = regs.arg4(); } if (tls) { *tls = regs.arg5(); } break; } } void extract_clone_parameters(Task* t, remote_ptr* stack, remote_ptr* parent_tid, remote_ptr* tls, remote_ptr* child_tid) { RR_ARCH_FUNCTION(extract_clone_parameters_arch, t->arch(), t->regs(), stack, parent_tid, tls, child_tid); } int read_elf_class(const string& filename) { ScopedFd fd(filename.c_str(), O_RDONLY); if (!fd.is_open()) { return NOT_ELF; } char elf_header[EI_CLASS + 1]; static const char magic[4] = { ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3 }; if (read(fd, elf_header, sizeof(elf_header)) != sizeof(elf_header) || memcmp(magic, elf_header, sizeof(magic)) != 0) { return NOT_ELF; } return elf_header[EI_CLASS]; } // Setting these causes us to trace instructions after // instruction_trace_at_event_start up to and including // instruction_trace_at_event_last static TraceFrame::Time instruction_trace_at_event_start = 0; static TraceFrame::Time instruction_trace_at_event_last = 0; bool trace_instructions_up_to_event(TraceFrame::Time event) { return event > instruction_trace_at_event_start && event <= instruction_trace_at_event_last; } void dump_task_set(const set& tasks) { printf("["); for (auto& t : tasks) { printf("%p (pid=%d, rec=%d),", t, t->tid, t->rec_tid); } printf("]\n"); } void dump_task_map(const map& tasks) { printf("["); for (auto& t : tasks) { printf("%p (pid=%d, rec=%d),", t.second, t.second->tid, t.second->rec_tid); } printf("]\n"); } string real_path(const string& path) { char buf[PATH_MAX]; if (realpath(path.c_str(), buf) == buf) { return string(buf); } return path; } string exe_directory() { string exe_path = real_path("/proc/self/exe"); int end = exe_path.length(); // Chop off the filename while (end > 0 && exe_path[end - 1] != '/') { --end; } exe_path.erase(end); return exe_path; } rr-4.1.0/src/util.h000066400000000000000000000157401265436462100140740ustar00rootroot00000000000000/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #ifndef RR_UTIL_H_ #define RR_UTIL_H_ #include #include #include #include #include "Event.h" #include "remote_ptr.h" #include "ScopedFd.h" #include "TraceFrame.h" /* * This file is a dumping ground for functionality that needs to be shared but * has no other obvious place to go. * * We should minimize the amount of code here. Code that's only needed in one * place can move out of this file. */ class KernelMapping; class Task; class TraceFrame; template constexpr size_t array_length(T(&array)[N]) { return N; } template constexpr size_t array_length(std::array& array) { return N; } template T return_dummy_value() { T v; memset(&v, 1, sizeof(T)); return v; } template bool check_type_has_no_holes() { T v; memset(&v, 2, sizeof(T)); v = return_dummy_value(); return memchr(&v, 2, sizeof(T)) == NULL; } /** * Returns true when type T has no holes. Preferably should not be defined * at all otherwise. * This is not 100% reliable since the check_type_has_no_holes may be * compiled to copy holes. However, it has detected at least two bugs. */ template bool type_has_no_holes() { static bool check = check_type_has_no_holes(); return check; } #define SHMEM_FS "/dev/shm" /* The syscallbuf shared with tracees is created with this prefix * followed by the tracee tid, then immediately unlinked and shared * anonymously. */ #define SYSCALLBUF_SHMEM_PATH_PREFIX "/tmp/rr-tracee-shmem-" #define PREFIX_FOR_EMPTY_MMAPED_REGIONS "/tmp/rr-emptyfile-" enum Completion { COMPLETE, INCOMPLETE }; enum Switchable { PREVENT_SWITCH, ALLOW_SWITCH }; /** * Create a file named |filename| and dump |buf_len| words in |buf| to * that file, starting with a line containing |label|. |start_addr| * is the client address at which |buf| resides, if meaningful. * |start_addr| is used to compute the output lines of words, which * look like "0xValue | [0xAddr]". */ void dump_binary_data(const char* filename, const char* label, const uint32_t* buf, size_t buf_len, remote_ptr start_addr); /** * Format a suitable filename within the trace directory for dumping * information about |t| at the current global time, to a file that * contains |tag|. The constructed filename is returned through * |filename|. For example, a filengit logame for a task with tid 12345 at * time 111, for a file tagged "foo", would be something like * "trace_0/12345_111_foo". The returned name is not guaranteed to be * unique, caveat emptor. */ void format_dump_filename(Task* t, TraceFrame::Time global_time, const char* tag, char* filename, size_t filename_size); /** * Return true if the user requested memory be dumped for |t| at |f|. */ bool should_dump_memory(Task* t, const TraceFrame& f); /** * Dump all of the memory in |t|'s address to the file * "[trace_dir]/[t->tid]_[global_time]_[tag]". */ void dump_process_memory(Task* t, TraceFrame::Time global_time, const char* tag); /** * Return true if the user has requested |t|'s memory be * checksummed at |f|. */ bool should_checksum(Task* t, const TraceFrame& f); /** * Write a checksum of each mapped region in |t|'s address space to a * special log, where it can be read by |validate_process_memory()| * during replay. */ void checksum_process_memory(Task* t, TraceFrame::Time global_time); /** * Validate the checksum of |t|'s address space that was written * during recording. */ void validate_process_memory(Task* t, TraceFrame::Time global_time); /** * Return nonzero if the rr session is probably not interactive (that * is, there's probably no user watching or interacting with rr), and * so asking for user input or other actions is probably pointless. */ bool probably_not_interactive(int fd = STDERR_FILENO); /** * Convert the flags passed to the clone() syscall, |flags_arg|, into * the format understood by Task::clone(). */ int clone_flags_to_task_flags(int flags_arg); /** * Return the argument rounded up to the nearest multiple of the * system |page_size()|. */ size_t ceil_page_size(size_t sz); remote_ptr ceil_page_size(remote_ptr addr); /** * Return the argument rounded down to the nearest multiple of the * system |page_size()|. */ size_t floor_page_size(size_t sz); remote_ptr floor_page_size(remote_ptr addr); /** Return the system page size. */ size_t page_size(); /** Return the default action of |sig|. */ enum signal_action { DUMP_CORE, TERMINATE, CONTINUE, STOP, IGNORE }; signal_action default_action(int sig); SignalDeterministic is_deterministic_signal(const siginfo_t& si); /** * Return nonzero if a mapping of |filename| with metadata |stat|, * using |flags| and |prot|, should almost certainly be copied to * trace; i.e., the file contents are likely to change in the interval * between recording and replay. Zero is returned /if we think we can * get away/ with not copying the region. That doesn't mean it's * necessarily safe to skip copying! */ bool should_copy_mmap_region(const KernelMapping& mapping, const struct stat& stat); /** * Ensure that the shmem segment referred to by |fd| has exactly the * size |num_bytes|. */ void resize_shmem_segment(ScopedFd& fd, uint64_t num_bytes); enum cpuid_requests { CPUID_GETVENDORSTRING, CPUID_GETFEATURES, CPUID_GETTLB, CPUID_GETSERIAL, CPUID_GETXSAVE = 0x0D, CPUID_INTELEXTENDED = 0x80000000, CPUID_INTELFEATURES, CPUID_INTELBRANDSTRING, CPUID_INTELBRANDSTRINGMORE, CPUID_INTELBRANDSTRINGEND, }; /** issue a single request to CPUID. Fits 'intel features', for instance * note that even if only "eax" and "edx" are of interest, other registers * will be modified by the operation, so we need to tell the compiler about it. * 'code' is placed in EAX. 'subrequest' is placed in ECX. * *a, *c and *d receive EAX, ECX and EDX respectively. */ void cpuid(int code, int subrequest, unsigned int* a, unsigned int* c, unsigned int* d); /** * Extract various clone(2) parameters out of the given Task's registers. * Each remote_ptr parameter may be nullptr. */ void extract_clone_parameters(Task* t, remote_ptr* stack, remote_ptr* ptid, remote_ptr* tls, remote_ptr* ctid); /** * Read the ELF CLASS from the given filename. If it's unable to be read, * return ELFCLASSNONE. If it's not an ELF file, return NOT_ELF. */ const int NOT_ELF = 0x10000; int read_elf_class(const std::string& filename); bool trace_instructions_up_to_event(TraceFrame::Time event); /* Helpful for broken debuggers */ void dump_task_set(const std::set& tasks); void dump_task_map(const std::map& tasks); std::string real_path(const std::string& path); std::string exe_directory(); #endif /* RR_UTIL_H_ */