IO-AIO-4.18/0000755000000000000000000000000012035451264011063 5ustar rootrootIO-AIO-4.18/configure.ac0000644000000000000000000000025711740732052013354 0ustar rootrootAC_INIT AC_CONFIG_SRCDIR([libeio/eio.h]) AC_CONFIG_HEADERS([libeio/config.h]) AC_PREREQ(2.60) AC_USE_SYSTEM_EXTENSIONS AC_PROG_CC m4_include([libeio/libeio.m4]) AC_OUTPUT IO-AIO-4.18/def0.h0000644000000000000000000001065212004015204012040 0ustar rootroot#ifndef ENOSYS #define ENOSYS 0 #endif #ifndef EXDEV #define EXDEV 0 #endif #ifndef EBADR #define EBADR 0 #endif #ifndef O_RDONLY #define O_RDONLY 0 #endif #ifndef O_WRONLY #define O_WRONLY 0 #endif #ifndef O_RDWR #define O_RDWR 0 #endif #ifndef O_CREAT #define O_CREAT 0 #endif #ifndef O_TRUNC #define O_TRUNC 0 #endif #ifndef O_EXCL #define O_EXCL 0 #endif #ifndef O_APPEND #define O_APPEND 0 #endif #ifndef O_ASYNC #define O_ASYNC 0 #endif #ifndef O_DIRECT #define O_DIRECT 0 #endif #ifndef O_NOATIME #define O_NOATIME 0 #endif #ifndef O_CLOEXEC #define O_CLOEXEC 0 #endif #ifndef O_NOCTTY #define O_NOCTTY 0 #endif #ifndef O_NOFOLLOW #define O_NOFOLLOW 0 #endif #ifndef O_NONBLOCK #define O_NONBLOCK 0 #endif #ifndef O_EXEC #define O_EXEC 0 #endif #ifndef O_SEARCH #define O_SEARCH 0 #endif #ifndef O_DIRECTORY #define O_DIRECTORY 0 #endif #ifndef O_DSYNC #define O_DSYNC 0 #endif #ifndef O_RSYNC #define O_RSYNC 0 #endif #ifndef O_SYNC #define O_SYNC 0 #endif #ifndef O_TTY_INIT #define O_TTY_INIT 0 #endif #ifndef S_IFIFO #define S_IFIFO 0 #endif #ifndef S_IFCHR #define S_IFCHR 0 #endif #ifndef S_IFBLK #define S_IFBLK 0 #endif #ifndef S_IFLNK #define S_IFLNK 0 #endif #ifndef S_IFREG #define S_IFREG 0 #endif #ifndef S_IFDIR #define S_IFDIR 0 #endif #ifndef S_IFWHT #define S_IFWHT 0 #endif #ifndef S_IFSOCK #define S_IFSOCK 0 #endif #ifndef S_IFMT #define S_IFMT 0 #endif #ifndef POSIX_FADV_NORMAL #define POSIX_FADV_NORMAL 0 #endif #ifndef POSIX_FADV_SEQUENTIAL #define POSIX_FADV_SEQUENTIAL 0 #endif #ifndef POSIX_FADV_RANDOM #define POSIX_FADV_RANDOM 0 #endif #ifndef POSIX_FADV_NOREUSE #define POSIX_FADV_NOREUSE 0 #endif #ifndef POSIX_FADV_WILLNEED #define POSIX_FADV_WILLNEED 0 #endif #ifndef POSIX_FADV_DONTNEED #define POSIX_FADV_DONTNEED 0 #endif #ifndef POSIX_MADV_NORMAL #define POSIX_MADV_NORMAL 0 #endif #ifndef POSIX_MADV_SEQUENTIAL #define POSIX_MADV_SEQUENTIAL 0 #endif #ifndef POSIX_MADV_RANDOM #define POSIX_MADV_RANDOM 0 #endif #ifndef POSIX_MADV_WILLNEED #define POSIX_MADV_WILLNEED 0 #endif #ifndef POSIX_MADV_DONTNEED #define POSIX_MADV_DONTNEED 0 #endif #ifndef ST_RDONLY #define ST_RDONLY 0 #endif #ifndef ST_NOSUID #define ST_NOSUID 0 #endif #ifndef ST_NODEV #define ST_NODEV 0 #endif #ifndef ST_NOEXEC #define ST_NOEXEC 0 #endif #ifndef ST_SYNCHRONOUS #define ST_SYNCHRONOUS 0 #endif #ifndef ST_MANDLOCK #define ST_MANDLOCK 0 #endif #ifndef ST_WRITE #define ST_WRITE 0 #endif #ifndef ST_APPEND #define ST_APPEND 0 #endif #ifndef ST_IMMUTABLE #define ST_IMMUTABLE 0 #endif #ifndef ST_NOATIME #define ST_NOATIME 0 #endif #ifndef ST_NODIRATIME #define ST_NODIRATIME 0 #endif #ifndef ST_RELATIME #define ST_RELATIME 0 #endif #ifndef PROT_NONE #define PROT_NONE 0 #endif #ifndef PROT_EXEC #define PROT_EXEC 0 #endif #ifndef PROT_READ #define PROT_READ 0 #endif #ifndef PROT_WRITE #define PROT_WRITE 0 #endif #ifndef MAP_PRIVATE #define MAP_PRIVATE 0 #endif #ifndef MAP_SHARED #define MAP_SHARED 0 #endif #ifndef MAP_ANONYMOUS #define MAP_ANONYMOUS 0 #endif #ifndef MAP_HUGETLB #define MAP_HUGETLB 0 #endif #ifndef MAP_LOCKED #define MAP_LOCKED 0 #endif #ifndef MAP_NORESERVE #define MAP_NORESERVE 0 #endif #ifndef MAP_POPULATE #define MAP_POPULATE 0 #endif #ifndef MAP_NONBLOCK #define MAP_NONBLOCK 0 #endif #ifndef FIEMAP_FLAG_SYNC #define FIEMAP_FLAG_SYNC 0 #endif #ifndef FIEMAP_FLAG_XATTR #define FIEMAP_FLAG_XATTR 0 #endif #ifndef FIEMAP_FLAGS_COMPAT #define FIEMAP_FLAGS_COMPAT 0 #endif #ifndef FIEMAP_EXTENT_LAST #define FIEMAP_EXTENT_LAST 0 #endif #ifndef FIEMAP_EXTENT_UNKNOWN #define FIEMAP_EXTENT_UNKNOWN 0 #endif #ifndef FIEMAP_EXTENT_DELALLOC #define FIEMAP_EXTENT_DELALLOC 0 #endif #ifndef FIEMAP_EXTENT_ENCODED #define FIEMAP_EXTENT_ENCODED 0 #endif #ifndef FIEMAP_EXTENT_DATA_ENCRYPTED #define FIEMAP_EXTENT_DATA_ENCRYPTED 0 #endif #ifndef FIEMAP_EXTENT_NOT_ALIGNED #define FIEMAP_EXTENT_NOT_ALIGNED 0 #endif #ifndef FIEMAP_EXTENT_DATA_INLINE #define FIEMAP_EXTENT_DATA_INLINE 0 #endif #ifndef FIEMAP_EXTENT_DATA_TAIL #define FIEMAP_EXTENT_DATA_TAIL 0 #endif #ifndef FIEMAP_EXTENT_UNWRITTEN #define FIEMAP_EXTENT_UNWRITTEN 0 #endif #ifndef FIEMAP_EXTENT_MERGED #define FIEMAP_EXTENT_MERGED 0 #endif #ifndef FIEMAP_EXTENT_SHARED #define FIEMAP_EXTENT_SHARED 0 #endif #ifndef SPLICE_F_MOVE #define SPLICE_F_MOVE 0 #endif #ifndef SPLICE_F_NONBLOCK #define SPLICE_F_NONBLOCK 0 #endif #ifndef SPLICE_F_MORE #define SPLICE_F_MORE 0 #endif #ifndef SPLICE_F_GIFT #define SPLICE_F_GIFT 0 #endif #ifndef SEEK_DATA #define SEEK_DATA 0 #endif #ifndef SEEK_HOLE #define SEEK_HOLE 0 #endif IO-AIO-4.18/libeio/0000755000000000000000000000000012035451264012326 5ustar rootrootIO-AIO-4.18/libeio/config.h.in0000644000000000000000000000663011762471732014366 0ustar rootroot/* config.h.in. Generated from configure.ac by autoheader. */ /* Define to 1 if you have the header file. */ #undef HAVE_DLFCN_H /* fdatasync(2) is available */ #undef HAVE_FDATASYNC /* futimes(2) is available */ #undef HAVE_FUTIMES /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H /* fallocate(2) is available */ #undef HAVE_LINUX_FALLOCATE /* Define to 1 if you have the header file. */ #undef HAVE_LINUX_FIEMAP_H /* Define to 1 if you have the header file. */ #undef HAVE_LINUX_FS_H /* splice/vmsplice/tee(2) are available */ #undef HAVE_LINUX_SPLICE /* Define to 1 if you have the header file. */ #undef HAVE_MEMORY_H /* posix_fadvise(2) is available */ #undef HAVE_POSIX_FADVISE /* posix_madvise(2) is available */ #undef HAVE_POSIX_MADVISE /* prctl(PR_SET_NAME) is available */ #undef HAVE_PRCTL_SET_NAME /* readahead(2) is available (linux) */ #undef HAVE_READAHEAD /* sendfile(2) is available and supported */ #undef HAVE_SENDFILE /* Define to 1 if you have the header file. */ #undef HAVE_STDINT_H /* Define to 1 if you have the header file. */ #undef HAVE_STDLIB_H /* Define to 1 if you have the header file. */ #undef HAVE_STRINGS_H /* Define to 1 if you have the header file. */ #undef HAVE_STRING_H /* sync_file_range(2) is available */ #undef HAVE_SYNC_FILE_RANGE /* Define to 1 if you have the header file. */ #undef HAVE_SYS_PRCTL_H /* Define to 1 if you have the header file. */ #undef HAVE_SYS_STAT_H /* syscall(__NR_syncfs) is available */ #undef HAVE_SYS_SYNCFS /* Define to 1 if you have the header file. */ #undef HAVE_SYS_SYSCALL_H /* Define to 1 if you have the header file. */ #undef HAVE_SYS_TYPES_H /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H /* utimes(2) is available */ #undef HAVE_UTIMES /* Define to the sub-directory in which libtool stores uninstalled libraries. */ #undef LT_OBJDIR /* Name of package */ #undef PACKAGE /* Define to the address where bug reports for this package should be sent. */ #undef PACKAGE_BUGREPORT /* Define to the full name of this package. */ #undef PACKAGE_NAME /* Define to the full name and version of this package. */ #undef PACKAGE_STRING /* Define to the one symbol short name of this package. */ #undef PACKAGE_TARNAME /* Define to the home page for this package. */ #undef PACKAGE_URL /* Define to the version of this package. */ #undef PACKAGE_VERSION /* Define to 1 if you have the ANSI C header files. */ #undef STDC_HEADERS /* Enable extensions on AIX 3, Interix. */ #ifndef _ALL_SOURCE # undef _ALL_SOURCE #endif /* Enable GNU extensions on systems that have them. */ #ifndef _GNU_SOURCE # undef _GNU_SOURCE #endif /* Enable threading extensions on Solaris. */ #ifndef _POSIX_PTHREAD_SEMANTICS # undef _POSIX_PTHREAD_SEMANTICS #endif /* Enable extensions on HP NonStop. */ #ifndef _TANDEM_SOURCE # undef _TANDEM_SOURCE #endif /* Enable general extensions on Solaris. */ #ifndef __EXTENSIONS__ # undef __EXTENSIONS__ #endif /* Version number of package */ #undef VERSION /* Define to 1 if on MINIX. */ #undef _MINIX /* Define to 2 if the system does not provide POSIX.1 features except with this defined. */ #undef _POSIX_1_SOURCE /* Define to 1 if you need to in order for `stat' and other things to work. */ #undef _POSIX_SOURCE IO-AIO-4.18/libeio/xthread.h0000644000000000000000000001130012035072404014124 0ustar rootroot#ifndef XTHREAD_H_ #define XTHREAD_H_ /* whether word reads are potentially non-atomic. * this is conservative, likely most arches this runs * on have atomic word read/writes. */ #ifndef WORDACCESS_UNSAFE # if __i386 || __x86_64 # define WORDACCESS_UNSAFE 0 # else # define WORDACCESS_UNSAFE 1 # endif #endif ///////////////////////////////////////////////////////////////////////////// #ifdef _WIN32 #define NTDDI_VERSION NTDDI_WIN2K // needed to get win2000 api calls #define _WIN32_WINNT 0x400 #include //D #include #include #include #include #include #include #include #define sigset_t int #define sigfillset(a) #define pthread_sigmask(a,b,c) #define sigaddset(a,b) #define sigemptyset(s) typedef pthread_mutex_t xmutex_t; #define X_MUTEX_INIT PTHREAD_MUTEX_INITIALIZER #define X_MUTEX_CREATE(mutex) pthread_mutex_init (&(mutex), 0) #define X_LOCK(mutex) pthread_mutex_lock (&(mutex)) #define X_UNLOCK(mutex) pthread_mutex_unlock (&(mutex)) typedef pthread_cond_t xcond_t; #define X_COND_INIT PTHREAD_COND_INITIALIZER #define X_COND_CREATE(cond) pthread_cond_init (&(cond), 0) #define X_COND_SIGNAL(cond) pthread_cond_signal (&(cond)) #define X_COND_WAIT(cond,mutex) pthread_cond_wait (&(cond), &(mutex)) #define X_COND_TIMEDWAIT(cond,mutex,to) pthread_cond_timedwait (&(cond), &(mutex), &(to)) typedef pthread_t xthread_t; #define X_THREAD_PROC(name) static void *name (void *thr_arg) #define X_THREAD_ATFORK(a,b,c) static int xthread_create (xthread_t *tid, void *(*proc)(void *), void *arg) { int retval; pthread_attr_t attr; pthread_attr_init (&attr); pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); retval = pthread_create (tid, &attr, proc, arg) == 0; pthread_attr_destroy (&attr); return retval; } #define respipe_read(a,b,c) PerlSock_recv ((a), (b), (c), 0) #define respipe_write(a,b,c) send ((a), (b), (c), 0) #define respipe_close(a) PerlSock_closesocket ((a)) #else ///////////////////////////////////////////////////////////////////////////// #if __linux && !defined(_GNU_SOURCE) # define _GNU_SOURCE #endif /* just in case */ #define _REENTRANT 1 #if __solaris # define _POSIX_PTHREAD_SEMANTICS 1 /* try to bribe solaris headers into providing a current pthread API * despite environment being configured for an older version. */ # define __EXTENSIONS__ 1 #endif #include #include #include #include #include typedef pthread_mutex_t xmutex_t; #if __linux && defined (PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP) # define X_MUTEX_INIT PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP # define X_MUTEX_CREATE(mutex) \ do { \ pthread_mutexattr_t attr; \ pthread_mutexattr_init (&attr); \ pthread_mutexattr_settype (&attr, PTHREAD_MUTEX_ADAPTIVE_NP); \ pthread_mutex_init (&(mutex), &attr); \ } while (0) #else # define X_MUTEX_INIT PTHREAD_MUTEX_INITIALIZER # define X_MUTEX_CREATE(mutex) pthread_mutex_init (&(mutex), 0) #endif #define X_LOCK(mutex) pthread_mutex_lock (&(mutex)) #define X_UNLOCK(mutex) pthread_mutex_unlock (&(mutex)) typedef pthread_cond_t xcond_t; #define X_COND_INIT PTHREAD_COND_INITIALIZER #define X_COND_CREATE(cond) pthread_cond_init (&(cond), 0) #define X_COND_SIGNAL(cond) pthread_cond_signal (&(cond)) #define X_COND_WAIT(cond,mutex) pthread_cond_wait (&(cond), &(mutex)) #define X_COND_TIMEDWAIT(cond,mutex,to) pthread_cond_timedwait (&(cond), &(mutex), &(to)) typedef pthread_t xthread_t; #define X_THREAD_PROC(name) static void *name (void *thr_arg) #define X_THREAD_ATFORK(prepare,parent,child) pthread_atfork (prepare, parent, child) // the broken bsd's once more #ifndef PTHREAD_STACK_MIN # define PTHREAD_STACK_MIN 0 #endif #ifndef X_STACKSIZE # define X_STACKSIZE sizeof (void *) * 4096 #endif static int xthread_create (xthread_t *tid, void *(*proc)(void *), void *arg) { int retval; sigset_t fullsigset, oldsigset; pthread_attr_t attr; pthread_attr_init (&attr); pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); pthread_attr_setstacksize (&attr, PTHREAD_STACK_MIN < X_STACKSIZE ? X_STACKSIZE : PTHREAD_STACK_MIN); #ifdef PTHREAD_SCOPE_PROCESS pthread_attr_setscope (&attr, PTHREAD_SCOPE_PROCESS); #endif sigfillset (&fullsigset); pthread_sigmask (SIG_SETMASK, &fullsigset, &oldsigset); retval = pthread_create (tid, &attr, proc, arg) == 0; pthread_sigmask (SIG_SETMASK, &oldsigset, 0); pthread_attr_destroy (&attr); return retval; } #define respipe_read(a,b,c) read ((a), (b), (c)) #define respipe_write(a,b,c) write ((a), (b), (c)) #define respipe_close(a) close ((a)) #endif #endif IO-AIO-4.18/libeio/eio.h0000644000000000000000000003763412012347250013262 0ustar rootroot/* * libeio API header * * Copyright (c) 2007,2008,2009,2010,2011,2012 Marc Alexander Lehmann * All rights reserved. * * Redistribution and use in source and binary forms, with or without modifica- * tion, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. * * Alternatively, the contents of this file may be used under the terms of * the GNU General Public License ("GPL") version 2 or any later version, * in which case the provisions of the GPL are applicable instead of * the above. If you wish to allow the use of your version of this file * only under the terms of the GPL and not to allow others to use your * version of this file under the BSD license, indicate your decision * by deleting the provisions above and replace them with the notice * and other provisions required by the GPL. If you do not delete the * provisions above, a recipient may use your version of this file under * either the BSD or the GPL. */ #ifndef EIO_H_ #define EIO_H_ #ifdef __cplusplus extern "C" { #endif #include #include #include typedef struct eio_req eio_req; typedef struct eio_dirent eio_dirent; typedef int (*eio_cb)(eio_req *req); #ifndef EIO_REQ_MEMBERS # define EIO_REQ_MEMBERS #endif #ifndef EIO_STRUCT_STAT # ifdef _WIN32 # define EIO_STRUCT_STAT struct _stati64 # define EIO_STRUCT_STATI64 # else # define EIO_STRUCT_STAT struct stat # endif #endif #ifdef _WIN32 typedef int eio_uid_t; typedef int eio_gid_t; #ifdef __MINGW32__ /* no intptr_t */ typedef ssize_t eio_ssize_t; #else typedef intptr_t eio_ssize_t; /* or SSIZE_T */ #endif #if __GNUC__ typedef long long eio_ino_t; /* signed for compatibility to msvc */ #else typedef __int64 eio_ino_t; /* unsigned not supported by msvc */ #endif #else typedef uid_t eio_uid_t; typedef gid_t eio_gid_t; typedef ssize_t eio_ssize_t; typedef ino_t eio_ino_t; #endif #ifndef EIO_STRUCT_STATVFS # define EIO_STRUCT_STATVFS struct statvfs #endif /* managing working directories */ typedef struct eio_pwd *eio_wd; #define EIO_CWD 0 /* the current working directory of the process, guaranteed to be a null pointer */ #define EIO_INVALID_WD ((eio_wd)(int)-1) /* failure return for eio_wd_open */ eio_wd eio_wd_open_sync (eio_wd wd, const char *path); void eio_wd_close_sync (eio_wd wd); /* for readdir */ /* eio_readdir flags */ enum { EIO_READDIR_DENTS = 0x01, /* ptr2 contains eio_dirents, not just the (unsorted) names */ EIO_READDIR_DIRS_FIRST = 0x02, /* dirents gets sorted into a good stat() ing order to find directories first */ EIO_READDIR_STAT_ORDER = 0x04, /* dirents gets sorted into a good stat() ing order to quickly stat all files */ EIO_READDIR_FOUND_UNKNOWN = 0x80, /* set by eio_readdir when *_ARRAY was set and any TYPE=UNKNOWN's were found */ EIO_READDIR_CUSTOM1 = 0x100, /* for use by apps */ EIO_READDIR_CUSTOM2 = 0x200 /* for use by apps */ }; /* using "typical" values in the hope that the compiler will do something sensible */ enum eio_dtype { EIO_DT_UNKNOWN = 0, EIO_DT_FIFO = 1, EIO_DT_CHR = 2, EIO_DT_MPC = 3, /* multiplexed char device (v7+coherent) */ EIO_DT_DIR = 4, EIO_DT_NAM = 5, /* xenix special named file */ EIO_DT_BLK = 6, EIO_DT_MPB = 7, /* multiplexed block device (v7+coherent) */ EIO_DT_REG = 8, EIO_DT_NWK = 9, /* HP-UX network special */ EIO_DT_CMP = 9, /* VxFS compressed */ EIO_DT_LNK = 10, /* DT_SHAD = 11,*/ EIO_DT_SOCK = 12, EIO_DT_DOOR = 13, /* solaris door */ EIO_DT_WHT = 14, EIO_DT_MAX = 15 /* highest DT_VALUE ever, hopefully */ }; struct eio_dirent { int nameofs; /* offset of null-terminated name string in (char *)req->ptr2 */ unsigned short namelen; /* size of filename without trailing 0 */ unsigned char type; /* one of EIO_DT_* */ signed char score; /* internal use */ eio_ino_t inode; /* the inode number, if available, otherwise unspecified */ }; /* eio_msync flags */ enum { EIO_MS_ASYNC = 1, EIO_MS_INVALIDATE = 2, EIO_MS_SYNC = 4 }; /* eio_mtouch flags */ enum { EIO_MT_MODIFY = 1 }; /* eio_sync_file_range flags */ enum { EIO_SYNC_FILE_RANGE_WAIT_BEFORE = 1, EIO_SYNC_FILE_RANGE_WRITE = 2, EIO_SYNC_FILE_RANGE_WAIT_AFTER = 4 }; /* eio_fallocate flags */ enum { /* these MUST match the value in linux/falloc.h */ EIO_FALLOC_FL_KEEP_SIZE = 1, EIO_FALLOC_FL_PUNCH_HOLE = 2 }; /* timestamps and differences - feel free to use double in your code directly */ typedef double eio_tstamp; /* the eio request structure */ enum { EIO_CUSTOM, EIO_WD_OPEN, EIO_WD_CLOSE, EIO_CLOSE, EIO_DUP2, EIO_SEEK, EIO_READ, EIO_WRITE, EIO_READAHEAD, EIO_SENDFILE, EIO_FSTAT, EIO_FSTATVFS, EIO_FTRUNCATE, EIO_FUTIME, EIO_FCHMOD, EIO_FCHOWN, EIO_SYNC, EIO_FSYNC, EIO_FDATASYNC, EIO_SYNCFS, EIO_MSYNC, EIO_MTOUCH, EIO_SYNC_FILE_RANGE, EIO_FALLOCATE, EIO_MLOCK, EIO_MLOCKALL, EIO_GROUP, EIO_NOP, EIO_BUSY, /* these use wd + ptr1, but are emulated */ EIO_REALPATH, EIO_READDIR, /* all the following requests use wd + ptr1 as path in xxxat functions */ EIO_OPEN, EIO_STAT, EIO_LSTAT, EIO_STATVFS, EIO_TRUNCATE, EIO_UTIME, EIO_CHMOD, EIO_CHOWN, EIO_UNLINK, EIO_RMDIR, EIO_MKDIR, EIO_RENAME, EIO_MKNOD, EIO_LINK, EIO_SYMLINK, EIO_READLINK, EIO_REQ_TYPE_NUM }; /* seek whence modes */ /* these are guaranteed to hasve the traditional 0, 1, 2 values, */ /* so you might as wlel use those */ enum { EIO_SEEK_SET = 0, EIO_SEEK_CUR = 1, EIO_SEEK_END = 2 }; /* mlockall constants */ enum { EIO_MCL_CURRENT = 1, EIO_MCL_FUTURE = 2 }; /* request priorities */ enum { EIO_PRI_MIN = -4, EIO_PRI_MAX = 4, EIO_PRI_DEFAULT = 0 }; /* eio request structure */ /* this structure is mostly read-only */ /* when initialising it, all members must be zero-initialised */ struct eio_req { eio_req volatile *next; /* private ETP */ eio_wd wd; /* all applicable requests: working directory of pathname, old name; wd_open: return wd */ eio_ssize_t result; /* result of syscall, e.g. result = read (... */ off_t offs; /* read, write, truncate, readahead, sync_file_range, fallocate: file offset, mknod: dev_t */ size_t size; /* read, write, readahead, sendfile, msync, mlock, sync_file_range, fallocate: length */ void *ptr1; /* all applicable requests: pathname, old name; readdir: optional eio_dirents */ void *ptr2; /* all applicable requests: new name or memory buffer; readdir: name strings */ eio_tstamp nv1; /* utime, futime: atime; busy: sleep time */ eio_tstamp nv2; /* utime, futime: mtime */ int type; /* EIO_xxx constant ETP */ int int1; /* all applicable requests: file descriptor; sendfile: output fd; open, msync, mlockall, readdir: flags */ long int2; /* chown, fchown: uid; sendfile: input fd; open, chmod, mkdir, mknod: file mode, seek: whence, sync_file_range, fallocate: flags */ long int3; /* chown, fchown: gid; rename, link: working directory of new name */ int errorno; /* errno value on syscall return */ #if __i386 || __amd64 unsigned char cancelled; #else sig_atomic_t cancelled; #endif unsigned char flags; /* private */ signed char pri; /* the priority */ void *data; eio_cb finish; void (*destroy)(eio_req *req); /* called when request no longer needed */ void (*feed)(eio_req *req); /* only used for group requests */ EIO_REQ_MEMBERS eio_req *grp, *grp_prev, *grp_next, *grp_first; /* private */ }; /* _private_ request flags */ enum { EIO_FLAG_PTR1_FREE = 0x01, /* need to free(ptr1) */ EIO_FLAG_PTR2_FREE = 0x02, /* need to free(ptr2) */ EIO_FLAG_GROUPADD = 0x04 /* some request was added to the group */ }; /* undocumented/unsupported/private helper */ /*void eio_page_align (void **addr, size_t *length);*/ /* returns < 0 on error, errno set * need_poll, if non-zero, will be called when results are available * and eio_poll_cb needs to be invoked (it MUST NOT call eio_poll_cb itself). * done_poll is called when the need to poll is gone. */ int eio_init (void (*want_poll)(void), void (*done_poll)(void)); /* must be called regularly to handle pending requests */ /* returns 0 if all requests were handled, -1 if not, or the value of EIO_FINISH if != 0 */ int eio_poll (void); /* stop polling if poll took longer than duration seconds */ void eio_set_max_poll_time (eio_tstamp nseconds); /* do not handle more then count requests in one call to eio_poll_cb */ void eio_set_max_poll_reqs (unsigned int nreqs); /* set minimum required number * maximum wanted number * or maximum idle number of threads */ void eio_set_min_parallel (unsigned int nthreads); void eio_set_max_parallel (unsigned int nthreads); void eio_set_max_idle (unsigned int nthreads); void eio_set_idle_timeout (unsigned int seconds); unsigned int eio_nreqs (void); /* number of requests in-flight */ unsigned int eio_nready (void); /* number of not-yet handled requests */ unsigned int eio_npending (void); /* number of finished but unhandled requests */ unsigned int eio_nthreads (void); /* number of worker threads in use currently */ /*****************************************************************************/ /* convenience wrappers */ #ifndef EIO_NO_WRAPPERS eio_req *eio_wd_open (const char *path, int pri, eio_cb cb, void *data); /* result=wd */ eio_req *eio_wd_close (eio_wd wd, int pri, eio_cb cb, void *data); eio_req *eio_nop (int pri, eio_cb cb, void *data); /* does nothing except go through the whole process */ eio_req *eio_busy (eio_tstamp delay, int pri, eio_cb cb, void *data); /* ties a thread for this long, simulating busyness */ eio_req *eio_sync (int pri, eio_cb cb, void *data); eio_req *eio_fsync (int fd, int pri, eio_cb cb, void *data); eio_req *eio_fdatasync (int fd, int pri, eio_cb cb, void *data); eio_req *eio_syncfs (int fd, int pri, eio_cb cb, void *data); eio_req *eio_msync (void *addr, size_t length, int flags, int pri, eio_cb cb, void *data); eio_req *eio_mtouch (void *addr, size_t length, int flags, int pri, eio_cb cb, void *data); eio_req *eio_mlock (void *addr, size_t length, int pri, eio_cb cb, void *data); eio_req *eio_mlockall (int flags, int pri, eio_cb cb, void *data); eio_req *eio_sync_file_range (int fd, off_t offset, size_t nbytes, unsigned int flags, int pri, eio_cb cb, void *data); eio_req *eio_fallocate (int fd, int mode, off_t offset, size_t len, int pri, eio_cb cb, void *data); eio_req *eio_close (int fd, int pri, eio_cb cb, void *data); eio_req *eio_readahead (int fd, off_t offset, size_t length, int pri, eio_cb cb, void *data); eio_req *eio_seek (int fd, off_t offset, int whence, int pri, eio_cb cb, void *data); eio_req *eio_read (int fd, void *buf, size_t length, off_t offset, int pri, eio_cb cb, void *data); eio_req *eio_write (int fd, void *buf, size_t length, off_t offset, int pri, eio_cb cb, void *data); eio_req *eio_fstat (int fd, int pri, eio_cb cb, void *data); /* stat buffer=ptr2 allocated dynamically */ eio_req *eio_fstatvfs (int fd, int pri, eio_cb cb, void *data); /* stat buffer=ptr2 allocated dynamically */ eio_req *eio_futime (int fd, eio_tstamp atime, eio_tstamp mtime, int pri, eio_cb cb, void *data); eio_req *eio_ftruncate (int fd, off_t offset, int pri, eio_cb cb, void *data); eio_req *eio_fchmod (int fd, mode_t mode, int pri, eio_cb cb, void *data); eio_req *eio_fchown (int fd, eio_uid_t uid, eio_gid_t gid, int pri, eio_cb cb, void *data); eio_req *eio_dup2 (int fd, int fd2, int pri, eio_cb cb, void *data); eio_req *eio_sendfile (int out_fd, int in_fd, off_t in_offset, size_t length, int pri, eio_cb cb, void *data); eio_req *eio_open (const char *path, int flags, mode_t mode, int pri, eio_cb cb, void *data); eio_req *eio_utime (const char *path, eio_tstamp atime, eio_tstamp mtime, int pri, eio_cb cb, void *data); eio_req *eio_truncate (const char *path, off_t offset, int pri, eio_cb cb, void *data); eio_req *eio_chown (const char *path, eio_uid_t uid, eio_gid_t gid, int pri, eio_cb cb, void *data); eio_req *eio_chmod (const char *path, mode_t mode, int pri, eio_cb cb, void *data); eio_req *eio_mkdir (const char *path, mode_t mode, int pri, eio_cb cb, void *data); eio_req *eio_readdir (const char *path, int flags, int pri, eio_cb cb, void *data); /* result=ptr2 allocated dynamically */ eio_req *eio_rmdir (const char *path, int pri, eio_cb cb, void *data); eio_req *eio_unlink (const char *path, int pri, eio_cb cb, void *data); eio_req *eio_readlink (const char *path, int pri, eio_cb cb, void *data); /* result=ptr2 allocated dynamically */ eio_req *eio_realpath (const char *path, int pri, eio_cb cb, void *data); /* result=ptr2 allocated dynamically */ eio_req *eio_stat (const char *path, int pri, eio_cb cb, void *data); /* stat buffer=ptr2 allocated dynamically */ eio_req *eio_lstat (const char *path, int pri, eio_cb cb, void *data); /* stat buffer=ptr2 allocated dynamically */ eio_req *eio_statvfs (const char *path, int pri, eio_cb cb, void *data); /* stat buffer=ptr2 allocated dynamically */ eio_req *eio_mknod (const char *path, mode_t mode, dev_t dev, int pri, eio_cb cb, void *data); eio_req *eio_link (const char *path, const char *new_path, int pri, eio_cb cb, void *data); eio_req *eio_symlink (const char *path, const char *new_path, int pri, eio_cb cb, void *data); eio_req *eio_rename (const char *path, const char *new_path, int pri, eio_cb cb, void *data); eio_req *eio_custom (void (*execute)(eio_req *), int pri, eio_cb cb, void *data); #endif /*****************************************************************************/ /* groups */ eio_req *eio_grp (eio_cb cb, void *data); void eio_grp_feed (eio_req *grp, void (*feed)(eio_req *req), int limit); void eio_grp_limit (eio_req *grp, int limit); void eio_grp_add (eio_req *grp, eio_req *req); void eio_grp_cancel (eio_req *grp); /* cancels all sub requests but not the group */ /*****************************************************************************/ /* request api */ /* true if the request was cancelled, useful in the invoke callback */ #define EIO_CANCELLED(req) ((req)->cancelled) #define EIO_RESULT(req) ((req)->result) /* returns a pointer to the result buffer allocated by eio */ #define EIO_BUF(req) ((req)->ptr2) #define EIO_STAT_BUF(req) ((EIO_STRUCT_STAT *)EIO_BUF(req)) #define EIO_STATVFS_BUF(req) ((EIO_STRUCT_STATVFS *)EIO_BUF(req)) #define EIO_PATH(req) ((char *)(req)->ptr1) /* submit a request for execution */ void eio_submit (eio_req *req); /* cancel a request as soon fast as possible, if possible */ void eio_cancel (eio_req *req); /*****************************************************************************/ /* convenience functions */ eio_ssize_t eio_sendfile_sync (int ofd, int ifd, off_t offset, size_t count); #ifdef __cplusplus } #endif #endif IO-AIO-4.18/libeio/eio.c0000644000000000000000000021042212035451177013252 0ustar rootroot/* * libeio implementation * * Copyright (c) 2007,2008,2009,2010,2011,2012 Marc Alexander Lehmann * All rights reserved. * * Redistribution and use in source and binary forms, with or without modifica- * tion, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. * * Alternatively, the contents of this file may be used under the terms of * the GNU General Public License ("GPL") version 2 or any later version, * in which case the provisions of the GPL are applicable instead of * the above. If you wish to allow the use of your version of this file * only under the terms of the GPL and not to allow others to use your * version of this file under the BSD license, indicate your decision * by deleting the provisions above and replace them with the notice * and other provisions required by the GPL. If you do not delete the * provisions above, a recipient may use your version of this file under * either the BSD or the GPL. */ #ifndef _WIN32 # include "config.h" #endif #include "eio.h" #include "ecb.h" #ifdef EIO_STACKSIZE # define X_STACKSIZE EIO_STACKSIZE #endif #include "xthread.h" #include #include #include #include #include #include #include #include #include #include /* intptr_t comes from unistd.h, says POSIX/UNIX/tradition */ /* intptr_t only comes from stdint.h, says idiot openbsd coder */ #if HAVE_STDINT_H # include #endif #ifndef ECANCELED # define ECANCELED EDOM #endif #ifndef ELOOP # define ELOOP EDOM #endif #if !defined(ENOTSOCK) && defined(WSAENOTSOCK) # define ENOTSOCK WSAENOTSOCK #endif static void eio_destroy (eio_req *req); #ifndef EIO_FINISH # define EIO_FINISH(req) ((req)->finish) && !EIO_CANCELLED (req) ? (req)->finish (req) : 0 #endif #ifndef EIO_DESTROY # define EIO_DESTROY(req) do { if ((req)->destroy) (req)->destroy (req); } while (0) #endif #ifndef EIO_FEED # define EIO_FEED(req) do { if ((req)->feed ) (req)->feed (req); } while (0) #endif #ifndef EIO_FD_TO_WIN32_HANDLE # define EIO_FD_TO_WIN32_HANDLE(fd) _get_osfhandle (fd) #endif #ifndef EIO_WIN32_HANDLE_TO_FD # define EIO_WIN32_HANDLE_TO_FD(handle) _open_osfhandle (handle, 0) #endif #define EIO_ERRNO(errval,retval) ((errno = errval), retval) #define EIO_ENOSYS() EIO_ERRNO (ENOSYS, -1) #ifdef _WIN32 #undef PAGESIZE #define PAGESIZE 4096 /* GetSystemInfo? */ /* TODO: look at how perl does stat (non-sloppy), unlink (ro-files), utime, link */ #ifdef EIO_STRUCT_STATI64 /* look at perl's non-sloppy stat */ #define stat(path,buf) _stati64 (path,buf) #define fstat(fd,buf) _fstati64 (fd,buf) #endif #define lstat(path,buf) stat (path,buf) #define fsync(fd) (FlushFileBuffers ((HANDLE)EIO_FD_TO_WIN32_HANDLE (fd)) ? 0 : EIO_ERRNO (EBADF, -1)) #define mkdir(path,mode) _mkdir (path) #define link(old,neu) (CreateHardLink (neu, old, 0) ? 0 : EIO_ERRNO (ENOENT, -1)) #define chmod(path,mode) _chmod (path, mode) #define dup(fd) _dup (fd) #define dup2(fd1,fd2) _dup2 (fd1, fd2) #define fchmod(fd,mode) EIO_ENOSYS () #define chown(path,uid,gid) EIO_ENOSYS () #define fchown(fd,uid,gid) EIO_ENOSYS () #define truncate(path,offs) EIO_ENOSYS () /* far-miss: SetEndOfFile */ #define ftruncate(fd,offs) EIO_ENOSYS () /* near-miss: SetEndOfFile */ #define mknod(path,mode,dev) EIO_ENOSYS () #define sync() EIO_ENOSYS () #define readlink(path,buf,s) EIO_ENOSYS () #define statvfs(path,buf) EIO_ENOSYS () #define fstatvfs(fd,buf) EIO_ENOSYS () #define pread(fd,buf,count,offset) eio__pread (fd, buf, count, offset) #define pwrite(fd,buf,count,offset) eio__pwrite (fd, buf, count, offset) #if __GNUC__ typedef long long eio_off_t; /* signed for compatibility to msvc */ #else typedef __int64 eio_off_t; /* unsigned not supported by msvc */ #endif static eio_ssize_t eio__pread (int fd, void *buf, eio_ssize_t count, eio_off_t offset) { OVERLAPPED o = { 0 }; DWORD got; o.Offset = offset; o.OffsetHigh = offset >> 32; return ReadFile ((HANDLE)EIO_FD_TO_WIN32_HANDLE (fd), buf, count, &got, &o) ? got : -1; } static eio_ssize_t eio__pwrite (int fd, void *buf, eio_ssize_t count, eio_off_t offset) { OVERLAPPED o = { 0 }; DWORD got; o.Offset = offset; o.OffsetHigh = offset >> 32; return WriteFile ((HANDLE)EIO_FD_TO_WIN32_HANDLE (fd), buf, count, &got, &o) ? got : -1; } /* rename() uses MoveFile, which fails to overwrite */ #define rename(old,neu) eio__rename (old, neu) static int eio__rename (const char *old, const char *neu) { if (MoveFileEx (old, neu, MOVEFILE_REPLACE_EXISTING)) return 0; /* should steal _dosmaperr */ switch (GetLastError ()) { case ERROR_FILE_NOT_FOUND: case ERROR_PATH_NOT_FOUND: case ERROR_INVALID_DRIVE: case ERROR_NO_MORE_FILES: case ERROR_BAD_NETPATH: case ERROR_BAD_NET_NAME: case ERROR_BAD_PATHNAME: case ERROR_FILENAME_EXCED_RANGE: errno = ENOENT; break; default: errno = EACCES; break; } return -1; } /* we could even stat and see if it exists */ static int symlink (const char *old, const char *neu) { #if WINVER >= 0x0600 if (CreateSymbolicLink (neu, old, 1)) return 0; if (CreateSymbolicLink (neu, old, 0)) return 0; #endif return EIO_ERRNO (ENOENT, -1); } /* POSIX API only */ #define CreateHardLink(neu,old,flags) 0 #define CreateSymbolicLink(neu,old,flags) 0 struct statvfs { int dummy; }; #define DT_DIR EIO_DT_DIR #define DT_REG EIO_DT_REG #define D_NAME(entp) entp.cFileName #define D_TYPE(entp) (entp.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY ? DT_DIR : DT_REG) #else #include #include #include #include #include #include #if _POSIX_MEMLOCK || _POSIX_MEMLOCK_RANGE || _POSIX_MAPPED_FILES #include #endif #define D_NAME(entp) entp->d_name /* POSIX_SOURCE is useless on bsd's, and XOPEN_SOURCE is unreliable there, too */ #if __FreeBSD__ || __NetBSD__ || __OpenBSD__ #define _DIRENT_HAVE_D_TYPE /* sigh */ #define D_INO(de) (de)->d_fileno #define D_NAMLEN(de) (de)->d_namlen #elif __linux || defined d_ino || _XOPEN_SOURCE >= 600 #define D_INO(de) (de)->d_ino #endif #ifdef _D_EXACT_NAMLEN #undef D_NAMLEN #define D_NAMLEN(de) _D_EXACT_NAMLEN (de) #endif #ifdef _DIRENT_HAVE_D_TYPE #define D_TYPE(de) (de)->d_type #endif #ifndef EIO_STRUCT_DIRENT #define EIO_STRUCT_DIRENT struct dirent #endif #endif #if HAVE_UTIMES # include #endif #if HAVE_SYS_SYSCALL_H # include #endif #if HAVE_SYS_PRCTL_H # include #endif #if HAVE_SENDFILE # if __linux # include # elif __FreeBSD__ || defined __APPLE__ # include # include # elif __hpux # include # elif __solaris # include # else # error sendfile support requested but not available # endif #endif #ifndef D_TYPE # define D_TYPE(de) 0 #endif #ifndef D_INO # define D_INO(de) 0 #endif #ifndef D_NAMLEN # define D_NAMLEN(entp) strlen (D_NAME (entp)) #endif /* used for struct dirent, AIX doesn't provide it */ #ifndef NAME_MAX # define NAME_MAX 4096 #endif /* used for readlink etc. */ #ifndef PATH_MAX # define PATH_MAX 4096 #endif /* buffer size for various temporary buffers */ #define EIO_BUFSIZE 65536 #define dBUF \ char *eio_buf = malloc (EIO_BUFSIZE); \ errno = ENOMEM; \ if (!eio_buf) \ return -1 #define FUBd \ free (eio_buf) #define EIO_TICKS ((1000000 + 1023) >> 10) /*****************************************************************************/ struct tmpbuf { void *ptr; int len; }; static void * tmpbuf_get (struct tmpbuf *buf, int len) { if (buf->len < len) { free (buf->ptr); buf->ptr = malloc (buf->len = len); } return buf->ptr; } struct tmpbuf; #if _POSIX_VERSION >= 200809L #define HAVE_AT 1 #define WD2FD(wd) ((wd) ? (wd)->fd : AT_FDCWD) #ifndef O_SEARCH #define O_SEARCH O_RDONLY #endif #else #define HAVE_AT 0 static const char *wd_expand (struct tmpbuf *tmpbuf, eio_wd wd, const char *path); #endif struct eio_pwd { #if HAVE_AT int fd; #endif int len; char str[1]; /* actually, a 0-terminated canonical path */ }; /*****************************************************************************/ #define ETP_PRI_MIN EIO_PRI_MIN #define ETP_PRI_MAX EIO_PRI_MAX struct etp_worker; #define ETP_REQ eio_req #define ETP_DESTROY(req) eio_destroy (req) static int eio_finish (eio_req *req); #define ETP_FINISH(req) eio_finish (req) static void eio_execute (struct etp_worker *self, eio_req *req); #define ETP_EXECUTE(wrk,req) eio_execute (wrk,req) /*****************************************************************************/ #define ETP_NUM_PRI (ETP_PRI_MAX - ETP_PRI_MIN + 1) /* calculate time difference in ~1/EIO_TICKS of a second */ ecb_inline int tvdiff (struct timeval *tv1, struct timeval *tv2) { return (tv2->tv_sec - tv1->tv_sec ) * EIO_TICKS + ((tv2->tv_usec - tv1->tv_usec) >> 10); } static unsigned int started, idle, wanted = 4; static void (*want_poll_cb) (void); static void (*done_poll_cb) (void); static unsigned int max_poll_time; /* reslock */ static unsigned int max_poll_reqs; /* reslock */ static unsigned int nreqs; /* reqlock */ static unsigned int nready; /* reqlock */ static unsigned int npending; /* reqlock */ static unsigned int max_idle = 4; /* maximum number of threads that can idle indefinitely */ static unsigned int idle_timeout = 10; /* number of seconds after which an idle threads exit */ static xmutex_t wrklock; static xmutex_t reslock; static xmutex_t reqlock; static xcond_t reqwait; typedef struct etp_worker { struct tmpbuf tmpbuf; /* locked by wrklock */ struct etp_worker *prev, *next; xthread_t tid; #ifdef ETP_WORKER_COMMON ETP_WORKER_COMMON #endif } etp_worker; static etp_worker wrk_first; /* NOT etp */ #define ETP_WORKER_LOCK(wrk) X_LOCK (wrklock) #define ETP_WORKER_UNLOCK(wrk) X_UNLOCK (wrklock) /* worker threads management */ static void etp_worker_clear (etp_worker *wrk) { } static void ecb_cold etp_worker_free (etp_worker *wrk) { free (wrk->tmpbuf.ptr); wrk->next->prev = wrk->prev; wrk->prev->next = wrk->next; free (wrk); } static unsigned int etp_nreqs (void) { int retval; if (WORDACCESS_UNSAFE) X_LOCK (reqlock); retval = nreqs; if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock); return retval; } static unsigned int etp_nready (void) { unsigned int retval; if (WORDACCESS_UNSAFE) X_LOCK (reqlock); retval = nready; if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock); return retval; } static unsigned int etp_npending (void) { unsigned int retval; if (WORDACCESS_UNSAFE) X_LOCK (reqlock); retval = npending; if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock); return retval; } static unsigned int etp_nthreads (void) { unsigned int retval; if (WORDACCESS_UNSAFE) X_LOCK (reqlock); retval = started; if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock); return retval; } /* * a somewhat faster data structure might be nice, but * with 8 priorities this actually needs <20 insns * per shift, the most expensive operation. */ typedef struct { ETP_REQ *qs[ETP_NUM_PRI], *qe[ETP_NUM_PRI]; /* qstart, qend */ int size; } etp_reqq; static etp_reqq req_queue; static etp_reqq res_queue; static void ecb_noinline ecb_cold reqq_init (etp_reqq *q) { int pri; for (pri = 0; pri < ETP_NUM_PRI; ++pri) q->qs[pri] = q->qe[pri] = 0; q->size = 0; } static int ecb_noinline reqq_push (etp_reqq *q, ETP_REQ *req) { int pri = req->pri; req->next = 0; if (q->qe[pri]) { q->qe[pri]->next = req; q->qe[pri] = req; } else q->qe[pri] = q->qs[pri] = req; return q->size++; } static ETP_REQ * ecb_noinline reqq_shift (etp_reqq *q) { int pri; if (!q->size) return 0; --q->size; for (pri = ETP_NUM_PRI; pri--; ) { eio_req *req = q->qs[pri]; if (req) { if (!(q->qs[pri] = (eio_req *)req->next)) q->qe[pri] = 0; return req; } } abort (); } static int ecb_cold etp_init (void (*want_poll)(void), void (*done_poll)(void)) { X_MUTEX_CREATE (wrklock); X_MUTEX_CREATE (reslock); X_MUTEX_CREATE (reqlock); X_COND_CREATE (reqwait); reqq_init (&req_queue); reqq_init (&res_queue); wrk_first.next = wrk_first.prev = &wrk_first; started = 0; idle = 0; nreqs = 0; nready = 0; npending = 0; want_poll_cb = want_poll; done_poll_cb = done_poll; return 0; } X_THREAD_PROC (etp_proc); static void ecb_cold etp_start_thread (void) { etp_worker *wrk = calloc (1, sizeof (etp_worker)); /*TODO*/ assert (("unable to allocate worker thread data", wrk)); X_LOCK (wrklock); if (xthread_create (&wrk->tid, etp_proc, (void *)wrk)) { wrk->prev = &wrk_first; wrk->next = wrk_first.next; wrk_first.next->prev = wrk; wrk_first.next = wrk; ++started; } else free (wrk); X_UNLOCK (wrklock); } static void etp_maybe_start_thread (void) { if (ecb_expect_true (etp_nthreads () >= wanted)) return; /* todo: maybe use idle here, but might be less exact */ if (ecb_expect_true (0 <= (int)etp_nthreads () + (int)etp_npending () - (int)etp_nreqs ())) return; etp_start_thread (); } static void ecb_cold etp_end_thread (void) { eio_req *req = calloc (1, sizeof (eio_req)); /* will be freed by worker */ req->type = -1; req->pri = ETP_PRI_MAX - ETP_PRI_MIN; X_LOCK (reqlock); reqq_push (&req_queue, req); X_COND_SIGNAL (reqwait); X_UNLOCK (reqlock); X_LOCK (wrklock); --started; X_UNLOCK (wrklock); } static int etp_poll (void) { unsigned int maxreqs; unsigned int maxtime; struct timeval tv_start, tv_now; X_LOCK (reslock); maxreqs = max_poll_reqs; maxtime = max_poll_time; X_UNLOCK (reslock); if (maxtime) gettimeofday (&tv_start, 0); for (;;) { ETP_REQ *req; etp_maybe_start_thread (); X_LOCK (reslock); req = reqq_shift (&res_queue); if (req) { --npending; if (!res_queue.size && done_poll_cb) done_poll_cb (); } X_UNLOCK (reslock); if (!req) return 0; X_LOCK (reqlock); --nreqs; X_UNLOCK (reqlock); if (ecb_expect_false (req->type == EIO_GROUP && req->size)) { req->int1 = 1; /* mark request as delayed */ continue; } else { int res = ETP_FINISH (req); if (ecb_expect_false (res)) return res; } if (ecb_expect_false (maxreqs && !--maxreqs)) break; if (maxtime) { gettimeofday (&tv_now, 0); if (tvdiff (&tv_start, &tv_now) >= maxtime) break; } } errno = EAGAIN; return -1; } static void etp_cancel (ETP_REQ *req) { req->cancelled = 1; eio_grp_cancel (req); } static void etp_submit (ETP_REQ *req) { req->pri -= ETP_PRI_MIN; if (ecb_expect_false (req->pri < ETP_PRI_MIN - ETP_PRI_MIN)) req->pri = ETP_PRI_MIN - ETP_PRI_MIN; if (ecb_expect_false (req->pri > ETP_PRI_MAX - ETP_PRI_MIN)) req->pri = ETP_PRI_MAX - ETP_PRI_MIN; if (ecb_expect_false (req->type == EIO_GROUP)) { /* I hope this is worth it :/ */ X_LOCK (reqlock); ++nreqs; X_UNLOCK (reqlock); X_LOCK (reslock); ++npending; if (!reqq_push (&res_queue, req) && want_poll_cb) want_poll_cb (); X_UNLOCK (reslock); } else { X_LOCK (reqlock); ++nreqs; ++nready; reqq_push (&req_queue, req); X_COND_SIGNAL (reqwait); X_UNLOCK (reqlock); etp_maybe_start_thread (); } } static void ecb_cold etp_set_max_poll_time (double nseconds) { if (WORDACCESS_UNSAFE) X_LOCK (reslock); max_poll_time = nseconds * EIO_TICKS; if (WORDACCESS_UNSAFE) X_UNLOCK (reslock); } static void ecb_cold etp_set_max_poll_reqs (unsigned int maxreqs) { if (WORDACCESS_UNSAFE) X_LOCK (reslock); max_poll_reqs = maxreqs; if (WORDACCESS_UNSAFE) X_UNLOCK (reslock); } static void ecb_cold etp_set_max_idle (unsigned int nthreads) { if (WORDACCESS_UNSAFE) X_LOCK (reqlock); max_idle = nthreads; if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock); } static void ecb_cold etp_set_idle_timeout (unsigned int seconds) { if (WORDACCESS_UNSAFE) X_LOCK (reqlock); idle_timeout = seconds; if (WORDACCESS_UNSAFE) X_UNLOCK (reqlock); } static void ecb_cold etp_set_min_parallel (unsigned int nthreads) { if (wanted < nthreads) wanted = nthreads; } static void ecb_cold etp_set_max_parallel (unsigned int nthreads) { if (wanted > nthreads) wanted = nthreads; while (started > wanted) etp_end_thread (); } /*****************************************************************************/ static void grp_try_feed (eio_req *grp) { while (grp->size < grp->int2 && !EIO_CANCELLED (grp)) { grp->flags &= ~EIO_FLAG_GROUPADD; EIO_FEED (grp); /* stop if no progress has been made */ if (!(grp->flags & EIO_FLAG_GROUPADD)) { grp->feed = 0; break; } } } static int grp_dec (eio_req *grp) { --grp->size; /* call feeder, if applicable */ grp_try_feed (grp); /* finish, if done */ if (!grp->size && grp->int1) return eio_finish (grp); else return 0; } static void eio_destroy (eio_req *req) { if ((req)->flags & EIO_FLAG_PTR1_FREE) free (req->ptr1); if ((req)->flags & EIO_FLAG_PTR2_FREE) free (req->ptr2); EIO_DESTROY (req); } static int eio_finish (eio_req *req) { int res = EIO_FINISH (req); if (req->grp) { int res2; eio_req *grp = req->grp; /* unlink request */ if (req->grp_next) req->grp_next->grp_prev = req->grp_prev; if (req->grp_prev) req->grp_prev->grp_next = req->grp_next; if (grp->grp_first == req) grp->grp_first = req->grp_next; res2 = grp_dec (grp); if (!res) res = res2; } eio_destroy (req); return res; } void eio_grp_cancel (eio_req *grp) { for (grp = grp->grp_first; grp; grp = grp->grp_next) eio_cancel (grp); } void eio_cancel (eio_req *req) { etp_cancel (req); } void eio_submit (eio_req *req) { etp_submit (req); } unsigned int eio_nreqs (void) { return etp_nreqs (); } unsigned int eio_nready (void) { return etp_nready (); } unsigned int eio_npending (void) { return etp_npending (); } unsigned int ecb_cold eio_nthreads (void) { return etp_nthreads (); } void ecb_cold eio_set_max_poll_time (double nseconds) { etp_set_max_poll_time (nseconds); } void ecb_cold eio_set_max_poll_reqs (unsigned int maxreqs) { etp_set_max_poll_reqs (maxreqs); } void ecb_cold eio_set_max_idle (unsigned int nthreads) { etp_set_max_idle (nthreads); } void ecb_cold eio_set_idle_timeout (unsigned int seconds) { etp_set_idle_timeout (seconds); } void ecb_cold eio_set_min_parallel (unsigned int nthreads) { etp_set_min_parallel (nthreads); } void ecb_cold eio_set_max_parallel (unsigned int nthreads) { etp_set_max_parallel (nthreads); } int eio_poll (void) { return etp_poll (); } /*****************************************************************************/ /* work around various missing functions */ #ifndef HAVE_UTIMES # undef utimes # define utimes(path,times) eio__utimes (path, times) static int eio__utimes (const char *filename, const struct timeval times[2]) { if (times) { struct utimbuf buf; buf.actime = times[0].tv_sec; buf.modtime = times[1].tv_sec; return utime (filename, &buf); } else return utime (filename, 0); } #endif #ifndef HAVE_FUTIMES # undef futimes # define futimes(fd,times) eio__futimes (fd, times) static int eio__futimes (int fd, const struct timeval tv[2]) { errno = ENOSYS; return -1; } #endif #if !HAVE_FDATASYNC # undef fdatasync # define fdatasync(fd) fsync (fd) #endif static int eio__syncfs (int fd) { int res; #if HAVE_SYS_SYNCFS res = (int)syscall (__NR_syncfs, (int)(fd)); #else res = EIO_ENOSYS (); #endif if (res < 0 && errno == ENOSYS && fd >= 0) sync (); return res; } /* sync_file_range always needs emulation */ static int eio__sync_file_range (int fd, off_t offset, size_t nbytes, unsigned int flags) { #if HAVE_SYNC_FILE_RANGE int res; if (EIO_SYNC_FILE_RANGE_WAIT_BEFORE != SYNC_FILE_RANGE_WAIT_BEFORE || EIO_SYNC_FILE_RANGE_WRITE != SYNC_FILE_RANGE_WRITE || EIO_SYNC_FILE_RANGE_WAIT_AFTER != SYNC_FILE_RANGE_WAIT_AFTER) { flags = 0 | (flags & EIO_SYNC_FILE_RANGE_WAIT_BEFORE ? SYNC_FILE_RANGE_WAIT_BEFORE : 0) | (flags & EIO_SYNC_FILE_RANGE_WRITE ? SYNC_FILE_RANGE_WRITE : 0) | (flags & EIO_SYNC_FILE_RANGE_WAIT_AFTER ? SYNC_FILE_RANGE_WAIT_AFTER : 0); } res = sync_file_range (fd, offset, nbytes, flags); if (!res || errno != ENOSYS) return res; #endif /* even though we could play tricks with the flags, it's better to always * call fdatasync, as that matches the expectation of its users best */ return fdatasync (fd); } static int eio__fallocate (int fd, int mode, off_t offset, size_t len) { #if HAVE_LINUX_FALLOCATE return fallocate (fd, mode, offset, len); #else return EIO_ENOSYS (); #endif } #if !HAVE_READAHEAD # undef readahead # define readahead(fd,offset,count) eio__readahead (fd, offset, count, self) static eio_ssize_t eio__readahead (int fd, off_t offset, size_t count, etp_worker *self) { size_t todo = count; dBUF; while (todo > 0) { size_t len = todo < EIO_BUFSIZE ? todo : EIO_BUFSIZE; pread (fd, eio_buf, len, offset); offset += len; todo -= len; } FUBd; /* linux's readahead basically only fails for EBADF or EINVAL (not mmappable) */ /* but not for e.g. EIO or eof, so we also never fail */ return 0; } #endif /* sendfile always needs emulation */ static eio_ssize_t eio__sendfile (int ofd, int ifd, off_t offset, size_t count) { eio_ssize_t written = 0; eio_ssize_t res; if (!count) return 0; for (;;) { #ifdef __APPLE__ # undef HAVE_SENDFILE /* broken, as everything on os x */ #endif #if HAVE_SENDFILE # if __linux off_t soffset = offset; res = sendfile (ofd, ifd, &soffset, count); # elif __FreeBSD__ /* * Of course, the freebsd sendfile is a dire hack with no thoughts * wasted on making it similar to other I/O functions. */ off_t sbytes; res = sendfile (ifd, ofd, offset, count, 0, &sbytes, 0); #if 0 /* according to the manpage, this is correct, but broken behaviour */ /* freebsd' sendfile will return 0 on success */ /* freebsd 8 documents it as only setting *sbytes on EINTR and EAGAIN, but */ /* not on e.g. EIO or EPIPE - sounds broken */ if ((res < 0 && (errno == EAGAIN || errno == EINTR) && sbytes) || res == 0) res = sbytes; #endif /* according to source inspection, this is correct, and useful behaviour */ if (sbytes) res = sbytes; # elif defined __APPLE__ off_t sbytes = count; res = sendfile (ifd, ofd, offset, &sbytes, 0, 0); /* according to the manpage, sbytes is always valid */ if (sbytes) res = sbytes; # elif __hpux res = sendfile (ofd, ifd, offset, count, 0, 0); # elif __solaris struct sendfilevec vec; size_t sbytes; vec.sfv_fd = ifd; vec.sfv_flag = 0; vec.sfv_off = offset; vec.sfv_len = count; res = sendfilev (ofd, &vec, 1, &sbytes); if (res < 0 && sbytes) res = sbytes; # endif #elif defined (_WIN32) && 0 /* does not work, just for documentation of what would need to be done */ /* actually, cannot be done like this, as TransmitFile changes the file offset, */ /* libeio guarantees that the file offset does not change, and windows */ /* has no way to get an independent handle to the same file description */ HANDLE h = TO_SOCKET (ifd); SetFilePointer (h, offset, 0, FILE_BEGIN); res = TransmitFile (TO_SOCKET (ofd), h, count, 0, 0, 0, 0); #else res = EIO_ENOSYS (); #endif /* we assume sendfile can copy at least 128mb in one go */ if (res <= 128 * 1024 * 1024) { if (res > 0) written += res; if (written) return written; break; } else { /* if we requested more, then probably the kernel was lazy */ written += res; offset += res; count -= res; if (!count) return written; } } if (res < 0 && (errno == ENOSYS || errno == EINVAL || errno == ENOTSOCK /* BSDs */ #ifdef ENOTSUP /* sigh, if the steenking pile called openbsd would only try to at least compile posix code... */ || errno == ENOTSUP #endif #ifdef EOPNOTSUPP /* windows */ || errno == EOPNOTSUPP /* BSDs */ #endif #if __solaris || errno == EAFNOSUPPORT || errno == EPROTOTYPE #endif ) ) { /* emulate sendfile. this is a major pain in the ass */ dBUF; res = 0; while (count) { eio_ssize_t cnt; cnt = pread (ifd, eio_buf, count > EIO_BUFSIZE ? EIO_BUFSIZE : count, offset); if (cnt <= 0) { if (cnt && !res) res = -1; break; } cnt = write (ofd, eio_buf, cnt); if (cnt <= 0) { if (cnt && !res) res = -1; break; } offset += cnt; res += cnt; count -= cnt; } FUBd; } return res; } #ifdef PAGESIZE # define eio_pagesize() PAGESIZE #else static intptr_t eio_pagesize (void) { static intptr_t page; if (!page) page = sysconf (_SC_PAGESIZE); return page; } #endif static void eio_page_align (void **addr, size_t *length) { intptr_t mask = eio_pagesize () - 1; /* round down addr */ intptr_t adj = mask & (intptr_t)*addr; *addr = (void *)((intptr_t)*addr - adj); *length += adj; /* round up length */ *length = (*length + mask) & ~mask; } #if !_POSIX_MEMLOCK # define eio__mlockall(a) EIO_ENOSYS () #else static int eio__mlockall (int flags) { #if __GLIBC__ == 2 && __GLIBC_MINOR__ <= 7 extern int mallopt (int, int); mallopt (-6, 238); /* http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=473812 */ #endif if (EIO_MCL_CURRENT != MCL_CURRENT || EIO_MCL_FUTURE != MCL_FUTURE) { flags = 0 | (flags & EIO_MCL_CURRENT ? MCL_CURRENT : 0) | (flags & EIO_MCL_FUTURE ? MCL_FUTURE : 0); } return mlockall (flags); } #endif #if !_POSIX_MEMLOCK_RANGE # define eio__mlock(a,b) EIO_ENOSYS () #else static int eio__mlock (void *addr, size_t length) { eio_page_align (&addr, &length); return mlock (addr, length); } #endif #if !(_POSIX_MAPPED_FILES && _POSIX_SYNCHRONIZED_IO) # define eio__msync(a,b,c) EIO_ENOSYS () #else static int eio__msync (void *mem, size_t len, int flags) { eio_page_align (&mem, &len); if (EIO_MS_ASYNC != MS_SYNC || EIO_MS_INVALIDATE != MS_INVALIDATE || EIO_MS_SYNC != MS_SYNC) { flags = 0 | (flags & EIO_MS_ASYNC ? MS_ASYNC : 0) | (flags & EIO_MS_INVALIDATE ? MS_INVALIDATE : 0) | (flags & EIO_MS_SYNC ? MS_SYNC : 0); } return msync (mem, len, flags); } #endif static int eio__mtouch (eio_req *req) { void *mem = req->ptr2; size_t len = req->size; int flags = req->int1; eio_page_align (&mem, &len); { intptr_t addr = (intptr_t)mem; intptr_t end = addr + len; intptr_t page = eio_pagesize (); if (addr < end) if (flags & EIO_MT_MODIFY) /* modify */ do { *((volatile sig_atomic_t *)addr) |= 0; } while ((addr += page) < len && !EIO_CANCELLED (req)); else do { *((volatile sig_atomic_t *)addr) ; } while ((addr += page) < len && !EIO_CANCELLED (req)); } return 0; } /*****************************************************************************/ /* requests implemented outside eio_execute, because they are so large */ static void eio__lseek (eio_req *req) { /* this usually gets optimised away completely, or your compiler sucks, */ /* or the whence constants really are not 0, 1, 2 */ int whence = req->int2 == EIO_SEEK_SET ? SEEK_SET : req->int2 == EIO_SEEK_CUR ? SEEK_CUR : req->int2 == EIO_SEEK_END ? SEEK_END : req->int2; req->offs = lseek (req->int1, req->offs, whence); req->result = req->offs == (off_t)-1 ? -1 : 0; } /* result will always end up in tmpbuf, there is always space for adding a 0-byte */ static int eio__realpath (struct tmpbuf *tmpbuf, eio_wd wd, const char *path) { const char *rel = path; char *res; char *tmp1, *tmp2; #if SYMLOOP_MAX > 32 int symlinks = SYMLOOP_MAX; #else int symlinks = 32; #endif errno = EINVAL; if (!rel) return -1; errno = ENOENT; if (!*rel) return -1; res = tmpbuf_get (tmpbuf, PATH_MAX * 3); tmp1 = res + PATH_MAX; tmp2 = tmp1 + PATH_MAX; #if 0 /* disabled, the musl way to do things is just too racy */ #if __linux && defined(O_NONBLOCK) && defined(O_NOATIME) /* on linux we may be able to ask the kernel */ { int fd = open (rel, O_RDONLY | O_NONBLOCK | O_NOCTTY | O_NOATIME); if (fd >= 0) { sprintf (tmp1, "/proc/self/fd/%d", fd); req->result = readlink (tmp1, res, PATH_MAX); close (fd); /* here we should probably stat the open file and the disk file, to make sure they still match */ if (req->result > 0) goto done; } else if (errno == ELOOP || errno == ENAMETOOLONG || errno == ENOENT || errno == ENOTDIR || errno == EIO) return; } #endif #endif if (*rel != '/') { int len; errno = ENOENT; if (wd == EIO_INVALID_WD) return -1; if (wd == EIO_CWD) { if (!getcwd (res, PATH_MAX)) return -1; len = strlen (res); } else memcpy (res, wd->str, len = wd->len); if (res [1]) /* only use if not / */ res += len; } while (*rel) { eio_ssize_t len, linklen; const char *beg = rel; while (*rel && *rel != '/') ++rel; len = rel - beg; if (!len) /* skip slashes */ { ++rel; continue; } if (beg [0] == '.') { if (len == 1) continue; /* . - nop */ if (beg [1] == '.' && len == 2) { /* .. - back up one component, if possible */ while (res != tmpbuf->ptr) if (*--res == '/') break; continue; } } errno = ENAMETOOLONG; if (res + 1 + len + 1 >= tmp1) return -1; /* copy one component */ *res = '/'; memcpy (res + 1, beg, len); /* zero-terminate, for readlink */ res [len + 1] = 0; /* now check if it's a symlink */ linklen = readlink (tmpbuf->ptr, tmp1, PATH_MAX); if (linklen < 0) { if (errno != EINVAL) return -1; /* it's a normal directory. hopefully */ res += len + 1; } else { /* yay, it was a symlink - build new path in tmp2 */ int rellen = strlen (rel); errno = ENAMETOOLONG; if (linklen + 1 + rellen >= PATH_MAX) return -1; errno = ELOOP; if (!--symlinks) return -1; if (*tmp1 == '/') res = tmpbuf->ptr; /* symlink resolves to an absolute path */ /* we need to be careful, as rel might point into tmp2 already */ memmove (tmp2 + linklen + 1, rel, rellen + 1); tmp2 [linklen] = '/'; memcpy (tmp2, tmp1, linklen); rel = tmp2; } } /* special case for the lone root path */ if (res == tmpbuf->ptr) *res++ = '/'; return res - (char *)tmpbuf->ptr; } static signed char eio_dent_cmp (const eio_dirent *a, const eio_dirent *b) { return a->score - b->score ? a->score - b->score /* works because our signed char is always 0..100 */ : a->inode < b->inode ? -1 : a->inode > b->inode ? 1 : 0; } #define EIO_DENT_CMP(i,op,j) eio_dent_cmp (&i, &j) op 0 #define EIO_SORT_CUTOFF 30 /* quite high, but performs well on many filesystems */ #define EIO_SORT_FAST 60 /* when to only use insertion sort */ static void eio_dent_radix_sort (eio_dirent *dents, int size, signed char score_bits, eio_ino_t inode_bits) { unsigned char bits [9 + sizeof (eio_ino_t) * 8]; unsigned char *bit = bits; assert (CHAR_BIT == 8); assert (sizeof (eio_dirent) * 8 < 256); assert (offsetof (eio_dirent, inode)); /* we use bit #0 as sentinel */ assert (offsetof (eio_dirent, score)); /* we use bit #0 as sentinel */ if (size <= EIO_SORT_FAST) return; /* first prepare an array of bits to test in our radix sort */ /* try to take endianness into account, as well as differences in eio_ino_t sizes */ /* inode_bits must contain all inodes ORed together */ /* which is used to skip bits that are 0 everywhere, which is very common */ { eio_ino_t endianness; int i, j; /* we store the byte offset of byte n into byte n of "endianness" */ for (i = 0; i < sizeof (eio_ino_t); ++i) ((unsigned char *)&endianness)[i] = i; *bit++ = 0; for (i = 0; i < sizeof (eio_ino_t); ++i) { /* shifting off the byte offsets out of "endianness" */ int offs = (offsetof (eio_dirent, inode) + (endianness & 0xff)) * 8; endianness >>= 8; for (j = 0; j < 8; ++j) if (inode_bits & (((eio_ino_t)1) << (i * 8 + j))) *bit++ = offs + j; } for (j = 0; j < 8; ++j) if (score_bits & (1 << j)) *bit++ = offsetof (eio_dirent, score) * 8 + j; } /* now actually do the sorting (a variant of MSD radix sort) */ { eio_dirent *base_stk [9 + sizeof (eio_ino_t) * 8], *base; eio_dirent *end_stk [9 + sizeof (eio_ino_t) * 8], *end; unsigned char *bit_stk [9 + sizeof (eio_ino_t) * 8]; int stk_idx = 0; base_stk [stk_idx] = dents; end_stk [stk_idx] = dents + size; bit_stk [stk_idx] = bit - 1; do { base = base_stk [stk_idx]; end = end_stk [stk_idx]; bit = bit_stk [stk_idx]; for (;;) { unsigned char O = *bit >> 3; unsigned char M = 1 << (*bit & 7); eio_dirent *a = base; eio_dirent *b = end; if (b - a < EIO_SORT_CUTOFF) break; /* now bit-partition the array on the bit */ /* this ugly asymmetric loop seems to perform much better than typical */ /* partition algos found in the literature */ do if (!(((unsigned char *)a)[O] & M)) ++a; else if (!(((unsigned char *)--b)[O] & M)) { eio_dirent tmp = *a; *a = *b; *b = tmp; ++a; } while (b > a); /* next bit, or stop, if no bits left in this path */ if (!*--bit) break; base_stk [stk_idx] = a; end_stk [stk_idx] = end; bit_stk [stk_idx] = bit; ++stk_idx; end = a; } } while (stk_idx--); } } static void eio_dent_insertion_sort (eio_dirent *dents, int size) { /* first move the smallest element to the front, to act as a sentinel */ { int i; eio_dirent *min = dents; /* the radix pre-pass ensures that the minimum element is in the first EIO_SORT_CUTOFF + 1 elements */ for (i = size > EIO_SORT_FAST ? EIO_SORT_CUTOFF + 1 : size; --i; ) if (EIO_DENT_CMP (dents [i], <, *min)) min = &dents [i]; /* swap elements 0 and j (minimum) */ { eio_dirent tmp = *dents; *dents = *min; *min = tmp; } } /* then do standard insertion sort, assuming that all elements are >= dents [0] */ { eio_dirent *i, *j; for (i = dents + 1; i < dents + size; ++i) { eio_dirent value = *i; for (j = i - 1; EIO_DENT_CMP (*j, >, value); --j) j [1] = j [0]; j [1] = value; } } } static void eio_dent_sort (eio_dirent *dents, int size, signed char score_bits, eio_ino_t inode_bits) { if (size <= 1) return; /* our insertion sort relies on size > 0 */ /* first we use a radix sort, but only for dirs >= EIO_SORT_FAST */ /* and stop sorting when the partitions are <= EIO_SORT_CUTOFF */ eio_dent_radix_sort (dents, size, score_bits, inode_bits); /* use an insertion sort at the end, or for small arrays, */ /* as insertion sort is more efficient for small partitions */ eio_dent_insertion_sort (dents, size); } /* read a full directory */ static void eio__scandir (eio_req *req, etp_worker *self) { char *name, *names; int namesalloc = 4096 - sizeof (void *) * 4; int namesoffs = 0; int flags = req->int1; eio_dirent *dents = 0; int dentalloc = 128; int dentoffs = 0; eio_ino_t inode_bits = 0; #ifdef _WIN32 HANDLE dirp; WIN32_FIND_DATA entp; #else DIR *dirp; EIO_STRUCT_DIRENT *entp; #endif req->result = -1; if (!(flags & EIO_READDIR_DENTS)) flags &= ~(EIO_READDIR_DIRS_FIRST | EIO_READDIR_STAT_ORDER); #ifdef _WIN32 { int len = strlen ((const char *)req->ptr1); char *path = malloc (MAX_PATH); const char *fmt; const char *reqpath = wd_expand (&self->tmpbuf, req->wd, req->ptr1); if (!len) fmt = "./*"; else if (reqpath[len - 1] == '/' || reqpath[len - 1] == '\\') fmt = "%s*"; else fmt = "%s/*"; _snprintf (path, MAX_PATH, fmt, reqpath); dirp = FindFirstFile (path, &entp); free (path); if (dirp == INVALID_HANDLE_VALUE) { /* should steal _dosmaperr */ switch (GetLastError ()) { case ERROR_FILE_NOT_FOUND: req->result = 0; break; case ERROR_INVALID_NAME: case ERROR_PATH_NOT_FOUND: case ERROR_NO_MORE_FILES: errno = ENOENT; break; case ERROR_NOT_ENOUGH_MEMORY: errno = ENOMEM; break; default: errno = EINVAL; break; } return; } } #else #if HAVE_AT if (req->wd) { int fd = openat (WD2FD (req->wd), req->ptr1, O_CLOEXEC | O_SEARCH | O_DIRECTORY); if (fd < 0) return; dirp = fdopendir (fd); if (!dirp) close (fd); } else dirp = opendir (req->ptr1); #else dirp = opendir (wd_expand (&self->tmpbuf, req->wd, req->ptr1)); #endif if (!dirp) return; #endif if (req->flags & EIO_FLAG_PTR1_FREE) free (req->ptr1); req->flags |= EIO_FLAG_PTR1_FREE | EIO_FLAG_PTR2_FREE; req->ptr1 = dents = flags ? malloc (dentalloc * sizeof (eio_dirent)) : 0; req->ptr2 = names = malloc (namesalloc); if (!names || (flags && !dents)) return; for (;;) { int done; #ifdef _WIN32 done = !dirp; #else errno = 0; entp = readdir (dirp); done = !entp; #endif if (done) { #ifndef _WIN32 int old_errno = errno; closedir (dirp); errno = old_errno; if (errno) break; #endif /* sort etc. */ req->int1 = flags; req->result = dentoffs; if (flags & EIO_READDIR_STAT_ORDER) eio_dent_sort (dents, dentoffs, flags & EIO_READDIR_DIRS_FIRST ? 7 : 0, inode_bits); else if (flags & EIO_READDIR_DIRS_FIRST) if (flags & EIO_READDIR_FOUND_UNKNOWN) eio_dent_sort (dents, dentoffs, 7, inode_bits); /* sort by score and inode */ else { /* in this case, all is known, and we just put dirs first and sort them */ eio_dirent *oth = dents + dentoffs; eio_dirent *dir = dents; /* now partition dirs to the front, and non-dirs to the back */ /* by walking from both sides and swapping if necessary */ while (oth > dir) { if (dir->type == EIO_DT_DIR) ++dir; else if ((--oth)->type == EIO_DT_DIR) { eio_dirent tmp = *dir; *dir = *oth; *oth = tmp; ++dir; } } /* now sort the dirs only (dirs all have the same score) */ eio_dent_sort (dents, dir - dents, 0, inode_bits); } break; } /* now add the entry to our list(s) */ name = D_NAME (entp); /* skip . and .. entries */ if (name [0] != '.' || (name [1] && (name [1] != '.' || name [2]))) { int len = D_NAMLEN (entp) + 1; while (ecb_expect_false (namesoffs + len > namesalloc)) { namesalloc *= 2; req->ptr2 = names = realloc (names, namesalloc); if (!names) break; } memcpy (names + namesoffs, name, len); if (dents) { struct eio_dirent *ent; if (ecb_expect_false (dentoffs == dentalloc)) { dentalloc *= 2; req->ptr1 = dents = realloc (dents, dentalloc * sizeof (eio_dirent)); if (!dents) break; } ent = dents + dentoffs; ent->nameofs = namesoffs; /* rather dirtily we store the offset in the pointer */ ent->namelen = len - 1; ent->inode = D_INO (entp); inode_bits |= ent->inode; switch (D_TYPE (entp)) { default: ent->type = EIO_DT_UNKNOWN; flags |= EIO_READDIR_FOUND_UNKNOWN; break; #ifdef DT_FIFO case DT_FIFO: ent->type = EIO_DT_FIFO; break; #endif #ifdef DT_CHR case DT_CHR: ent->type = EIO_DT_CHR; break; #endif #ifdef DT_MPC case DT_MPC: ent->type = EIO_DT_MPC; break; #endif #ifdef DT_DIR case DT_DIR: ent->type = EIO_DT_DIR; break; #endif #ifdef DT_NAM case DT_NAM: ent->type = EIO_DT_NAM; break; #endif #ifdef DT_BLK case DT_BLK: ent->type = EIO_DT_BLK; break; #endif #ifdef DT_MPB case DT_MPB: ent->type = EIO_DT_MPB; break; #endif #ifdef DT_REG case DT_REG: ent->type = EIO_DT_REG; break; #endif #ifdef DT_NWK case DT_NWK: ent->type = EIO_DT_NWK; break; #endif #ifdef DT_CMP case DT_CMP: ent->type = EIO_DT_CMP; break; #endif #ifdef DT_LNK case DT_LNK: ent->type = EIO_DT_LNK; break; #endif #ifdef DT_SOCK case DT_SOCK: ent->type = EIO_DT_SOCK; break; #endif #ifdef DT_DOOR case DT_DOOR: ent->type = EIO_DT_DOOR; break; #endif #ifdef DT_WHT case DT_WHT: ent->type = EIO_DT_WHT; break; #endif } ent->score = 7; if (flags & EIO_READDIR_DIRS_FIRST) { if (ent->type == EIO_DT_UNKNOWN) { if (*name == '.') /* leading dots are likely directories, and, in any case, rare */ ent->score = 1; else if (!strchr (name, '.')) /* absence of dots indicate likely dirs */ ent->score = len <= 2 ? 4 - len : len <= 4 ? 4 : len <= 7 ? 5 : 6; /* shorter == more likely dir, but avoid too many classes */ } else if (ent->type == EIO_DT_DIR) ent->score = 0; } } namesoffs += len; ++dentoffs; } if (EIO_CANCELLED (req)) { errno = ECANCELED; break; } #ifdef _WIN32 if (!FindNextFile (dirp, &entp)) { FindClose (dirp); dirp = 0; } #endif } } /*****************************************************************************/ /* working directory stuff */ /* various deficiencies in the posix 2008 api force us to */ /* keep the absolute path in string form at all times */ /* fuck yeah. */ #if !HAVE_AT /* a bit like realpath, but usually faster because it doesn'T have to return */ /* an absolute or canonical path */ static const char * wd_expand (struct tmpbuf *tmpbuf, eio_wd wd, const char *path) { if (!wd || *path == '/') return path; if (path [0] == '.' && !path [1]) return wd->str; { int l1 = wd->len; int l2 = strlen (path); char *res = tmpbuf_get (tmpbuf, l1 + l2 + 2); memcpy (res, wd->str, l1); res [l1] = '/'; memcpy (res + l1 + 1, path, l2 + 1); return res; } } #endif static eio_wd eio__wd_open_sync (struct tmpbuf *tmpbuf, eio_wd wd, const char *path) { int fd; eio_wd res; int len = eio__realpath (tmpbuf, wd, path); if (len < 0) return EIO_INVALID_WD; #if HAVE_AT fd = openat (WD2FD (wd), path, O_CLOEXEC | O_SEARCH | O_DIRECTORY); if (fd < 0) return EIO_INVALID_WD; #endif res = malloc (sizeof (*res) + len); /* one extra 0-byte */ #if HAVE_AT res->fd = fd; #endif res->len = len; memcpy (res->str, tmpbuf->ptr, len); res->str [len] = 0; return res; } eio_wd eio_wd_open_sync (eio_wd wd, const char *path) { struct tmpbuf tmpbuf = { 0 }; wd = eio__wd_open_sync (&tmpbuf, wd, path); free (tmpbuf.ptr); return wd; } void eio_wd_close_sync (eio_wd wd) { if (wd != EIO_INVALID_WD && wd != EIO_CWD) { #if HAVE_AT close (wd->fd); #endif free (wd); } } #if HAVE_AT /* they forgot these */ static int eio__truncateat (int dirfd, const char *path, off_t length) { int fd = openat (dirfd, path, O_WRONLY | O_CLOEXEC); int res; if (fd < 0) return fd; res = ftruncate (fd, length); close (fd); return res; } static int eio__statvfsat (int dirfd, const char *path, struct statvfs *buf) { int fd = openat (dirfd, path, O_SEARCH | O_CLOEXEC); int res; if (fd < 0) return fd; res = fstatvfs (fd, buf); close (fd); return res; } #endif /*****************************************************************************/ #define ALLOC(len) \ if (!req->ptr2) \ { \ X_LOCK (wrklock); \ req->flags |= EIO_FLAG_PTR2_FREE; \ X_UNLOCK (wrklock); \ req->ptr2 = malloc (len); \ if (!req->ptr2) \ { \ errno = ENOMEM; \ req->result = -1; \ break; \ } \ } static void ecb_noinline ecb_cold etp_proc_init (void) { #if HAVE_PRCTL_SET_NAME /* provide a more sensible "thread name" */ char name[16 + 1]; const int namelen = sizeof (name) - 1; int len; prctl (PR_GET_NAME, (unsigned long)name, 0, 0, 0); name [namelen] = 0; len = strlen (name); strcpy (name + (len <= namelen - 4 ? len : namelen - 4), "/eio"); prctl (PR_SET_NAME, (unsigned long)name, 0, 0, 0); #endif } X_THREAD_PROC (etp_proc) { ETP_REQ *req; struct timespec ts; etp_worker *self = (etp_worker *)thr_arg; etp_proc_init (); /* try to distribute timeouts somewhat evenly */ ts.tv_nsec = ((unsigned long)self & 1023UL) * (1000000000UL / 1024UL); for (;;) { ts.tv_sec = 0; X_LOCK (reqlock); for (;;) { req = reqq_shift (&req_queue); if (req) break; if (ts.tv_sec == 1) /* no request, but timeout detected, let's quit */ { X_UNLOCK (reqlock); X_LOCK (wrklock); --started; X_UNLOCK (wrklock); goto quit; } ++idle; if (idle <= max_idle) /* we are allowed to idle, so do so without any timeout */ X_COND_WAIT (reqwait, reqlock); else { /* initialise timeout once */ if (!ts.tv_sec) ts.tv_sec = time (0) + idle_timeout; if (X_COND_TIMEDWAIT (reqwait, reqlock, ts) == ETIMEDOUT) ts.tv_sec = 1; /* assuming this is not a value computed above.,.. */ } --idle; } --nready; X_UNLOCK (reqlock); if (req->type < 0) goto quit; ETP_EXECUTE (self, req); X_LOCK (reslock); ++npending; if (!reqq_push (&res_queue, req) && want_poll_cb) want_poll_cb (); etp_worker_clear (self); X_UNLOCK (reslock); } quit: free (req); X_LOCK (wrklock); etp_worker_free (self); X_UNLOCK (wrklock); return 0; } /*****************************************************************************/ int ecb_cold eio_init (void (*want_poll)(void), void (*done_poll)(void)) { return etp_init (want_poll, done_poll); } ecb_inline void eio_api_destroy (eio_req *req) { free (req); } #define REQ(rtype) \ eio_req *req; \ \ req = (eio_req *)calloc (1, sizeof *req); \ if (!req) \ return 0; \ \ req->type = rtype; \ req->pri = pri; \ req->finish = cb; \ req->data = data; \ req->destroy = eio_api_destroy; #define SEND eio_submit (req); return req #define PATH \ req->flags |= EIO_FLAG_PTR1_FREE; \ req->ptr1 = strdup (path); \ if (!req->ptr1) \ { \ eio_api_destroy (req); \ return 0; \ } static void eio_execute (etp_worker *self, eio_req *req) { #if HAVE_AT int dirfd; #else const char *path; #endif if (ecb_expect_false (EIO_CANCELLED (req))) { req->result = -1; req->errorno = ECANCELED; return; } if (ecb_expect_false (req->wd == EIO_INVALID_WD)) { req->result = -1; req->errorno = ENOENT; return; } if (req->type >= EIO_OPEN) { #if HAVE_AT dirfd = WD2FD (req->wd); #else path = wd_expand (&self->tmpbuf, req->wd, req->ptr1); #endif } switch (req->type) { case EIO_WD_OPEN: req->wd = eio__wd_open_sync (&self->tmpbuf, req->wd, req->ptr1); req->result = req->wd == EIO_INVALID_WD ? -1 : 0; break; case EIO_WD_CLOSE: req->result = 0; eio_wd_close_sync (req->wd); break; case EIO_SEEK: eio__lseek (req); break; case EIO_READ: ALLOC (req->size); req->result = req->offs >= 0 ? pread (req->int1, req->ptr2, req->size, req->offs) : read (req->int1, req->ptr2, req->size); break; case EIO_WRITE: req->result = req->offs >= 0 ? pwrite (req->int1, req->ptr2, req->size, req->offs) : write (req->int1, req->ptr2, req->size); break; case EIO_READAHEAD: req->result = readahead (req->int1, req->offs, req->size); break; case EIO_SENDFILE: req->result = eio__sendfile (req->int1, req->int2, req->offs, req->size); break; #if HAVE_AT case EIO_STAT: ALLOC (sizeof (EIO_STRUCT_STAT)); req->result = fstatat (dirfd, req->ptr1, (EIO_STRUCT_STAT *)req->ptr2, 0); break; case EIO_LSTAT: ALLOC (sizeof (EIO_STRUCT_STAT)); req->result = fstatat (dirfd, req->ptr1, (EIO_STRUCT_STAT *)req->ptr2, AT_SYMLINK_NOFOLLOW); break; case EIO_CHOWN: req->result = fchownat (dirfd, req->ptr1, req->int2, req->int3, 0); break; case EIO_CHMOD: req->result = fchmodat (dirfd, req->ptr1, (mode_t)req->int2, 0); break; case EIO_TRUNCATE: req->result = eio__truncateat (dirfd, req->ptr1, req->offs); break; case EIO_OPEN: req->result = openat (dirfd, req->ptr1, req->int1, (mode_t)req->int2); break; case EIO_UNLINK: req->result = unlinkat (dirfd, req->ptr1, 0); break; case EIO_RMDIR: req->result = unlinkat (dirfd, req->ptr1, AT_REMOVEDIR); break; case EIO_MKDIR: req->result = mkdirat (dirfd, req->ptr1, (mode_t)req->int2); break; case EIO_RENAME: req->result = renameat (dirfd, req->ptr1, WD2FD ((eio_wd)req->int3), req->ptr2); break; case EIO_LINK: req->result = linkat (dirfd, req->ptr1, WD2FD ((eio_wd)req->int3), req->ptr2, 0); break; case EIO_SYMLINK: req->result = symlinkat (req->ptr1, dirfd, req->ptr2); break; case EIO_MKNOD: req->result = mknodat (dirfd, req->ptr1, (mode_t)req->int2, (dev_t)req->offs); break; case EIO_READLINK: ALLOC (PATH_MAX); req->result = readlinkat (dirfd, req->ptr1, req->ptr2, PATH_MAX); break; case EIO_STATVFS: ALLOC (sizeof (EIO_STRUCT_STATVFS)); req->result = eio__statvfsat (dirfd, req->ptr1, (EIO_STRUCT_STATVFS *)req->ptr2); break; case EIO_UTIME: case EIO_FUTIME: { struct timespec ts[2]; struct timespec *times; if (req->nv1 != -1. || req->nv2 != -1.) { ts[0].tv_sec = req->nv1; ts[0].tv_nsec = (req->nv1 - ts[0].tv_sec) * 1e9; ts[1].tv_sec = req->nv2; ts[1].tv_nsec = (req->nv2 - ts[1].tv_sec) * 1e9; times = ts; } else times = 0; req->result = req->type == EIO_FUTIME ? futimens (req->int1, times) : utimensat (dirfd, req->ptr1, times, 0); } break; #else case EIO_STAT: ALLOC (sizeof (EIO_STRUCT_STAT)); req->result = stat (path , (EIO_STRUCT_STAT *)req->ptr2); break; case EIO_LSTAT: ALLOC (sizeof (EIO_STRUCT_STAT)); req->result = lstat (path , (EIO_STRUCT_STAT *)req->ptr2); break; case EIO_CHOWN: req->result = chown (path , req->int2, req->int3); break; case EIO_CHMOD: req->result = chmod (path , (mode_t)req->int2); break; case EIO_TRUNCATE: req->result = truncate (path , req->offs); break; case EIO_OPEN: req->result = open (path , req->int1, (mode_t)req->int2); break; case EIO_UNLINK: req->result = unlink (path ); break; case EIO_RMDIR: req->result = rmdir (path ); break; case EIO_MKDIR: req->result = mkdir (path , (mode_t)req->int2); break; case EIO_RENAME: req->result = rename (path , req->ptr2); break; case EIO_LINK: req->result = link (path , req->ptr2); break; case EIO_SYMLINK: req->result = symlink (path , req->ptr2); break; case EIO_MKNOD: req->result = mknod (path , (mode_t)req->int2, (dev_t)req->offs); break; case EIO_READLINK: ALLOC (PATH_MAX); req->result = readlink (path, req->ptr2, PATH_MAX); break; case EIO_STATVFS: ALLOC (sizeof (EIO_STRUCT_STATVFS)); req->result = statvfs (path , (EIO_STRUCT_STATVFS *)req->ptr2); break; case EIO_UTIME: case EIO_FUTIME: { struct timeval tv[2]; struct timeval *times; if (req->nv1 != -1. || req->nv2 != -1.) { tv[0].tv_sec = req->nv1; tv[0].tv_usec = (req->nv1 - tv[0].tv_sec) * 1e6; tv[1].tv_sec = req->nv2; tv[1].tv_usec = (req->nv2 - tv[1].tv_sec) * 1e6; times = tv; } else times = 0; req->result = req->type == EIO_FUTIME ? futimes (req->int1, times) : utimes (req->ptr1, times); } break; #endif case EIO_REALPATH: if (0 <= (req->result = eio__realpath (&self->tmpbuf, req->wd, req->ptr1))) { ALLOC (req->result); memcpy (req->ptr2, self->tmpbuf.ptr, req->result); } break; case EIO_FSTAT: ALLOC (sizeof (EIO_STRUCT_STAT)); req->result = fstat (req->int1, (EIO_STRUCT_STAT *)req->ptr2); break; case EIO_FSTATVFS: ALLOC (sizeof (EIO_STRUCT_STATVFS)); req->result = fstatvfs (req->int1, (EIO_STRUCT_STATVFS *)req->ptr2); break; case EIO_FCHOWN: req->result = fchown (req->int1, req->int2, req->int3); break; case EIO_FCHMOD: req->result = fchmod (req->int1, (mode_t)req->int2); break; case EIO_FTRUNCATE: req->result = ftruncate (req->int1, req->offs); break; case EIO_CLOSE: req->result = close (req->int1); break; case EIO_DUP2: req->result = dup2 (req->int1, req->int2); break; case EIO_SYNC: req->result = 0; sync (); break; case EIO_FSYNC: req->result = fsync (req->int1); break; case EIO_FDATASYNC: req->result = fdatasync (req->int1); break; case EIO_SYNCFS: req->result = eio__syncfs (req->int1); break; case EIO_SYNC_FILE_RANGE: req->result = eio__sync_file_range (req->int1, req->offs, req->size, req->int2); break; case EIO_MSYNC: req->result = eio__msync (req->ptr2, req->size, req->int1); break; case EIO_MTOUCH: req->result = eio__mtouch (req); break; case EIO_MLOCK: req->result = eio__mlock (req->ptr2, req->size); break; case EIO_MLOCKALL: req->result = eio__mlockall (req->int1); break; case EIO_FALLOCATE: req->result = eio__fallocate (req->int1, req->int2, req->offs, req->size); break; case EIO_READDIR: eio__scandir (req, self); break; case EIO_BUSY: #ifdef _WIN32 Sleep (req->nv1 * 1e3); #else { struct timeval tv; tv.tv_sec = req->nv1; tv.tv_usec = (req->nv1 - tv.tv_sec) * 1e6; req->result = select (0, 0, 0, 0, &tv); } #endif break; case EIO_GROUP: abort (); /* handled in eio_request */ case EIO_NOP: req->result = 0; break; case EIO_CUSTOM: req->feed (req); break; default: req->result = EIO_ENOSYS (); break; } req->errorno = errno; } #ifndef EIO_NO_WRAPPERS eio_req *eio_wd_open (const char *path, int pri, eio_cb cb, void *data) { REQ (EIO_WD_OPEN); PATH; SEND; } eio_req *eio_wd_close (eio_wd wd, int pri, eio_cb cb, void *data) { REQ (EIO_WD_CLOSE); req->wd = wd; SEND; } eio_req *eio_nop (int pri, eio_cb cb, void *data) { REQ (EIO_NOP); SEND; } eio_req *eio_busy (double delay, int pri, eio_cb cb, void *data) { REQ (EIO_BUSY); req->nv1 = delay; SEND; } eio_req *eio_sync (int pri, eio_cb cb, void *data) { REQ (EIO_SYNC); SEND; } eio_req *eio_fsync (int fd, int pri, eio_cb cb, void *data) { REQ (EIO_FSYNC); req->int1 = fd; SEND; } eio_req *eio_msync (void *addr, size_t length, int flags, int pri, eio_cb cb, void *data) { REQ (EIO_MSYNC); req->ptr2 = addr; req->size = length; req->int1 = flags; SEND; } eio_req *eio_fdatasync (int fd, int pri, eio_cb cb, void *data) { REQ (EIO_FDATASYNC); req->int1 = fd; SEND; } eio_req *eio_syncfs (int fd, int pri, eio_cb cb, void *data) { REQ (EIO_SYNCFS); req->int1 = fd; SEND; } eio_req *eio_sync_file_range (int fd, off_t offset, size_t nbytes, unsigned int flags, int pri, eio_cb cb, void *data) { REQ (EIO_SYNC_FILE_RANGE); req->int1 = fd; req->offs = offset; req->size = nbytes; req->int2 = flags; SEND; } eio_req *eio_mtouch (void *addr, size_t length, int flags, int pri, eio_cb cb, void *data) { REQ (EIO_MTOUCH); req->ptr2 = addr; req->size = length; req->int1 = flags; SEND; } eio_req *eio_mlock (void *addr, size_t length, int pri, eio_cb cb, void *data) { REQ (EIO_MLOCK); req->ptr2 = addr; req->size = length; SEND; } eio_req *eio_mlockall (int flags, int pri, eio_cb cb, void *data) { REQ (EIO_MLOCKALL); req->int1 = flags; SEND; } eio_req *eio_fallocate (int fd, int mode, off_t offset, size_t len, int pri, eio_cb cb, void *data) { REQ (EIO_FALLOCATE); req->int1 = fd; req->int2 = mode; req->offs = offset; req->size = len; SEND; } eio_req *eio_close (int fd, int pri, eio_cb cb, void *data) { REQ (EIO_CLOSE); req->int1 = fd; SEND; } eio_req *eio_readahead (int fd, off_t offset, size_t length, int pri, eio_cb cb, void *data) { REQ (EIO_READAHEAD); req->int1 = fd; req->offs = offset; req->size = length; SEND; } eio_req *eio_seek (int fd, off_t offset, int whence, int pri, eio_cb cb, void *data) { REQ (EIO_SEEK); req->int1 = fd; req->offs = offset; req->int2 = whence; SEND; } eio_req *eio_read (int fd, void *buf, size_t length, off_t offset, int pri, eio_cb cb, void *data) { REQ (EIO_READ); req->int1 = fd; req->offs = offset; req->size = length; req->ptr2 = buf; SEND; } eio_req *eio_write (int fd, void *buf, size_t length, off_t offset, int pri, eio_cb cb, void *data) { REQ (EIO_WRITE); req->int1 = fd; req->offs = offset; req->size = length; req->ptr2 = buf; SEND; } eio_req *eio_fstat (int fd, int pri, eio_cb cb, void *data) { REQ (EIO_FSTAT); req->int1 = fd; SEND; } eio_req *eio_fstatvfs (int fd, int pri, eio_cb cb, void *data) { REQ (EIO_FSTATVFS); req->int1 = fd; SEND; } eio_req *eio_futime (int fd, double atime, double mtime, int pri, eio_cb cb, void *data) { REQ (EIO_FUTIME); req->int1 = fd; req->nv1 = atime; req->nv2 = mtime; SEND; } eio_req *eio_ftruncate (int fd, off_t offset, int pri, eio_cb cb, void *data) { REQ (EIO_FTRUNCATE); req->int1 = fd; req->offs = offset; SEND; } eio_req *eio_fchmod (int fd, mode_t mode, int pri, eio_cb cb, void *data) { REQ (EIO_FCHMOD); req->int1 = fd; req->int2 = (long)mode; SEND; } eio_req *eio_fchown (int fd, eio_uid_t uid, eio_gid_t gid, int pri, eio_cb cb, void *data) { REQ (EIO_FCHOWN); req->int1 = fd; req->int2 = (long)uid; req->int3 = (long)gid; SEND; } eio_req *eio_dup2 (int fd, int fd2, int pri, eio_cb cb, void *data) { REQ (EIO_DUP2); req->int1 = fd; req->int2 = fd2; SEND; } eio_req *eio_sendfile (int out_fd, int in_fd, off_t in_offset, size_t length, int pri, eio_cb cb, void *data) { REQ (EIO_SENDFILE); req->int1 = out_fd; req->int2 = in_fd; req->offs = in_offset; req->size = length; SEND; } eio_req *eio_open (const char *path, int flags, mode_t mode, int pri, eio_cb cb, void *data) { REQ (EIO_OPEN); PATH; req->int1 = flags; req->int2 = (long)mode; SEND; } eio_req *eio_utime (const char *path, double atime, double mtime, int pri, eio_cb cb, void *data) { REQ (EIO_UTIME); PATH; req->nv1 = atime; req->nv2 = mtime; SEND; } eio_req *eio_truncate (const char *path, off_t offset, int pri, eio_cb cb, void *data) { REQ (EIO_TRUNCATE); PATH; req->offs = offset; SEND; } eio_req *eio_chown (const char *path, eio_uid_t uid, eio_gid_t gid, int pri, eio_cb cb, void *data) { REQ (EIO_CHOWN); PATH; req->int2 = (long)uid; req->int3 = (long)gid; SEND; } eio_req *eio_chmod (const char *path, mode_t mode, int pri, eio_cb cb, void *data) { REQ (EIO_CHMOD); PATH; req->int2 = (long)mode; SEND; } eio_req *eio_mkdir (const char *path, mode_t mode, int pri, eio_cb cb, void *data) { REQ (EIO_MKDIR); PATH; req->int2 = (long)mode; SEND; } static eio_req * eio__1path (int type, const char *path, int pri, eio_cb cb, void *data) { REQ (type); PATH; SEND; } eio_req *eio_readlink (const char *path, int pri, eio_cb cb, void *data) { return eio__1path (EIO_READLINK, path, pri, cb, data); } eio_req *eio_realpath (const char *path, int pri, eio_cb cb, void *data) { return eio__1path (EIO_REALPATH, path, pri, cb, data); } eio_req *eio_stat (const char *path, int pri, eio_cb cb, void *data) { return eio__1path (EIO_STAT, path, pri, cb, data); } eio_req *eio_lstat (const char *path, int pri, eio_cb cb, void *data) { return eio__1path (EIO_LSTAT, path, pri, cb, data); } eio_req *eio_statvfs (const char *path, int pri, eio_cb cb, void *data) { return eio__1path (EIO_STATVFS, path, pri, cb, data); } eio_req *eio_unlink (const char *path, int pri, eio_cb cb, void *data) { return eio__1path (EIO_UNLINK, path, pri, cb, data); } eio_req *eio_rmdir (const char *path, int pri, eio_cb cb, void *data) { return eio__1path (EIO_RMDIR, path, pri, cb, data); } eio_req *eio_readdir (const char *path, int flags, int pri, eio_cb cb, void *data) { REQ (EIO_READDIR); PATH; req->int1 = flags; SEND; } eio_req *eio_mknod (const char *path, mode_t mode, dev_t dev, int pri, eio_cb cb, void *data) { REQ (EIO_MKNOD); PATH; req->int2 = (long)mode; req->offs = (off_t)dev; SEND; } static eio_req * eio__2path (int type, const char *path, const char *new_path, int pri, eio_cb cb, void *data) { REQ (type); PATH; req->flags |= EIO_FLAG_PTR2_FREE; req->ptr2 = strdup (new_path); if (!req->ptr2) { eio_api_destroy (req); return 0; } SEND; } eio_req *eio_link (const char *path, const char *new_path, int pri, eio_cb cb, void *data) { return eio__2path (EIO_LINK, path, new_path, pri, cb, data); } eio_req *eio_symlink (const char *path, const char *new_path, int pri, eio_cb cb, void *data) { return eio__2path (EIO_SYMLINK, path, new_path, pri, cb, data); } eio_req *eio_rename (const char *path, const char *new_path, int pri, eio_cb cb, void *data) { return eio__2path (EIO_RENAME, path, new_path, pri, cb, data); } eio_req *eio_custom (void (*execute)(eio_req *), int pri, eio_cb cb, void *data) { REQ (EIO_CUSTOM); req->feed = execute; SEND; } #endif eio_req *eio_grp (eio_cb cb, void *data) { const int pri = EIO_PRI_MAX; REQ (EIO_GROUP); SEND; } #undef REQ #undef PATH #undef SEND /*****************************************************************************/ /* grp functions */ void eio_grp_feed (eio_req *grp, void (*feed)(eio_req *req), int limit) { grp->int2 = limit; grp->feed = feed; grp_try_feed (grp); } void eio_grp_limit (eio_req *grp, int limit) { grp->int2 = limit; grp_try_feed (grp); } void eio_grp_add (eio_req *grp, eio_req *req) { assert (("cannot add requests to IO::AIO::GRP after the group finished", grp->int1 != 2)); grp->flags |= EIO_FLAG_GROUPADD; ++grp->size; req->grp = grp; req->grp_prev = 0; req->grp_next = grp->grp_first; if (grp->grp_first) grp->grp_first->grp_prev = req; grp->grp_first = req; } /*****************************************************************************/ /* misc garbage */ eio_ssize_t eio_sendfile_sync (int ofd, int ifd, off_t offset, size_t count) { return eio__sendfile (ofd, ifd, offset, count); } IO-AIO-4.18/libeio/ecb.h0000644000000000000000000005772112034572206013243 0ustar rootroot/* * libecb - http://software.schmorp.de/pkg/libecb * * Copyright (©) 2009-2012 Marc Alexander Lehmann * Copyright (©) 2011 Emanuele Giaquinta * All rights reserved. * * Redistribution and use in source and binary forms, with or without modifica- * tion, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef ECB_H #define ECB_H /* 16 bits major, 16 bits minor */ #define ECB_VERSION 0x00010002 #ifdef _WIN32 typedef signed char int8_t; typedef unsigned char uint8_t; typedef signed short int16_t; typedef unsigned short uint16_t; typedef signed int int32_t; typedef unsigned int uint32_t; #if __GNUC__ typedef signed long long int64_t; typedef unsigned long long uint64_t; #else /* _MSC_VER || __BORLANDC__ */ typedef signed __int64 int64_t; typedef unsigned __int64 uint64_t; #endif #ifdef _WIN64 #define ECB_PTRSIZE 8 typedef uint64_t uintptr_t; typedef int64_t intptr_t; #else #define ECB_PTRSIZE 4 typedef uint32_t uintptr_t; typedef int32_t intptr_t; #endif #else #include #if UINTMAX_MAX > 0xffffffffU #define ECB_PTRSIZE 8 #else #define ECB_PTRSIZE 4 #endif #endif /* many compilers define _GNUC_ to some versions but then only implement * what their idiot authors think are the "more important" extensions, * causing enormous grief in return for some better fake benchmark numbers. * or so. * we try to detect these and simply assume they are not gcc - if they have * an issue with that they should have done it right in the first place. */ #ifndef ECB_GCC_VERSION #if !defined __GNUC_MINOR__ || defined __INTEL_COMPILER || defined __SUNPRO_C || defined __SUNPRO_CC || defined __llvm__ || defined __clang__ #define ECB_GCC_VERSION(major,minor) 0 #else #define ECB_GCC_VERSION(major,minor) (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))) #endif #endif #define ECB_C (__STDC__+0) /* this assumes that __STDC__ is either empty or a number */ #define ECB_C99 (__STDC_VERSION__ >= 199901L) #define ECB_C11 (__STDC_VERSION__ >= 201112L) #define ECB_CPP (__cplusplus+0) #define ECB_CPP11 (__cplusplus >= 201103L) #if ECB_CPP #define ECB_EXTERN_C extern "C" #define ECB_EXTERN_C_BEG ECB_EXTERN_C { #define ECB_EXTERN_C_END } #else #define ECB_EXTERN_C extern #define ECB_EXTERN_C_BEG #define ECB_EXTERN_C_END #endif /*****************************************************************************/ /* ECB_NO_THREADS - ecb is not used by multiple threads, ever */ /* ECB_NO_SMP - ecb might be used in multiple threads, but only on a single cpu */ #if ECB_NO_THREADS #define ECB_NO_SMP 1 #endif #if ECB_NO_SMP #define ECB_MEMORY_FENCE do { } while (0) #endif #ifndef ECB_MEMORY_FENCE #if ECB_GCC_VERSION(2,5) || defined __INTEL_COMPILER || (__llvm__ && __GNUC__) || __SUNPRO_C >= 0x5110 || __SUNPRO_CC >= 0x5110 #if __i386 || __i386__ #define ECB_MEMORY_FENCE __asm__ __volatile__ ("lock; orb $0, -1(%%esp)" : : : "memory") #define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ ("" : : : "memory") #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("") #elif __amd64 || __amd64__ || __x86_64 || __x86_64__ #define ECB_MEMORY_FENCE __asm__ __volatile__ ("mfence" : : : "memory") #define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ ("" : : : "memory") #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("") #elif __powerpc__ || __ppc__ || __powerpc64__ || __ppc64__ #define ECB_MEMORY_FENCE __asm__ __volatile__ ("sync" : : : "memory") #elif defined __ARM_ARCH_6__ || defined __ARM_ARCH_6J__ \ || defined __ARM_ARCH_6K__ || defined __ARM_ARCH_6ZK__ #define ECB_MEMORY_FENCE __asm__ __volatile__ ("mcr p15,0,%0,c7,c10,5" : : "r" (0) : "memory") #elif defined __ARM_ARCH_7__ || defined __ARM_ARCH_7A__ \ || defined __ARM_ARCH_7M__ || defined __ARM_ARCH_7R__ #define ECB_MEMORY_FENCE __asm__ __volatile__ ("dmb" : : : "memory") #elif __sparc || __sparc__ #define ECB_MEMORY_FENCE __asm__ __volatile__ ("membar #LoadStore | #LoadLoad | #StoreStore | #StoreLoad" : : : "memory") #define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ ("membar #LoadStore | #LoadLoad" : : : "memory") #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("membar #LoadStore | #StoreStore") #elif defined __s390__ || defined __s390x__ #define ECB_MEMORY_FENCE __asm__ __volatile__ ("bcr 15,0" : : : "memory") #elif defined __mips__ #define ECB_MEMORY_FENCE __asm__ __volatile__ ("sync" : : : "memory") #elif defined __alpha__ #define ECB_MEMORY_FENCE __asm__ __volatile__ ("mb" : : : "memory") #elif defined __hppa__ #define ECB_MEMORY_FENCE __asm__ __volatile__ ("" : : : "memory") #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("") #elif defined __ia64__ #define ECB_MEMORY_FENCE __asm__ __volatile__ ("mf" : : : "memory") #endif #endif #endif #ifndef ECB_MEMORY_FENCE #if ECB_GCC_VERSION(4,7) /* see comment below (stdatomic.h) about the C11 memory model. */ #define ECB_MEMORY_FENCE __atomic_thread_fence (__ATOMIC_SEQ_CST) /* The __has_feature syntax from clang is so misdesigned that we cannot use it * without risking compile time errors with other compilers. We *could* * define our own ecb_clang_has_feature, but I just can't be bothered to work * around this shit time and again. * #elif defined __clang && __has_feature (cxx_atomic) * // see comment below (stdatomic.h) about the C11 memory model. * #define ECB_MEMORY_FENCE __c11_atomic_thread_fence (__ATOMIC_SEQ_CST) */ #elif ECB_GCC_VERSION(4,4) || defined __INTEL_COMPILER || defined __clang__ #define ECB_MEMORY_FENCE __sync_synchronize () #elif _MSC_VER >= 1400 /* VC++ 2005 */ #pragma intrinsic(_ReadBarrier,_WriteBarrier,_ReadWriteBarrier) #define ECB_MEMORY_FENCE _ReadWriteBarrier () #define ECB_MEMORY_FENCE_ACQUIRE _ReadWriteBarrier () /* according to msdn, _ReadBarrier is not a load fence */ #define ECB_MEMORY_FENCE_RELEASE _WriteBarrier () #elif defined _WIN32 #include #define ECB_MEMORY_FENCE MemoryBarrier () /* actually just xchg on x86... scary */ #elif __SUNPRO_C >= 0x5110 || __SUNPRO_CC >= 0x5110 #include #define ECB_MEMORY_FENCE __machine_rw_barrier () #define ECB_MEMORY_FENCE_ACQUIRE __machine_r_barrier () #define ECB_MEMORY_FENCE_RELEASE __machine_w_barrier () #elif __xlC__ #define ECB_MEMORY_FENCE __sync () #endif #endif #ifndef ECB_MEMORY_FENCE #if ECB_C11 && !defined __STDC_NO_ATOMICS__ /* we assume that these memory fences work on all variables/all memory accesses, */ /* not just C11 atomics and atomic accesses */ #include /* Unfortunately, neither gcc 4.7 nor clang 3.1 generate any instructions for */ /* any fence other than seq_cst, which isn't very efficient for us. */ /* Why that is, we don't know - either the C11 memory model is quite useless */ /* for most usages, or gcc and clang have a bug */ /* I *currently* lean towards the latter, and inefficiently implement */ /* all three of ecb's fences as a seq_cst fence */ #define ECB_MEMORY_FENCE atomic_thread_fence (memory_order_seq_cst) #endif #endif #ifndef ECB_MEMORY_FENCE #if !ECB_AVOID_PTHREADS /* * if you get undefined symbol references to pthread_mutex_lock, * or failure to find pthread.h, then you should implement * the ECB_MEMORY_FENCE operations for your cpu/compiler * OR provide pthread.h and link against the posix thread library * of your system. */ #include #define ECB_NEEDS_PTHREADS 1 #define ECB_MEMORY_FENCE_NEEDS_PTHREADS 1 static pthread_mutex_t ecb_mf_lock = PTHREAD_MUTEX_INITIALIZER; #define ECB_MEMORY_FENCE do { pthread_mutex_lock (&ecb_mf_lock); pthread_mutex_unlock (&ecb_mf_lock); } while (0) #endif #endif #if !defined ECB_MEMORY_FENCE_ACQUIRE && defined ECB_MEMORY_FENCE #define ECB_MEMORY_FENCE_ACQUIRE ECB_MEMORY_FENCE #endif #if !defined ECB_MEMORY_FENCE_RELEASE && defined ECB_MEMORY_FENCE #define ECB_MEMORY_FENCE_RELEASE ECB_MEMORY_FENCE #endif /*****************************************************************************/ #if __cplusplus #define ecb_inline static inline #elif ECB_GCC_VERSION(2,5) #define ecb_inline static __inline__ #elif ECB_C99 #define ecb_inline static inline #else #define ecb_inline static #endif #if ECB_GCC_VERSION(3,3) #define ecb_restrict __restrict__ #elif ECB_C99 #define ecb_restrict restrict #else #define ecb_restrict #endif typedef int ecb_bool; #define ECB_CONCAT_(a, b) a ## b #define ECB_CONCAT(a, b) ECB_CONCAT_(a, b) #define ECB_STRINGIFY_(a) # a #define ECB_STRINGIFY(a) ECB_STRINGIFY_(a) #define ecb_function_ ecb_inline #if ECB_GCC_VERSION(3,1) #define ecb_attribute(attrlist) __attribute__(attrlist) #define ecb_is_constant(expr) __builtin_constant_p (expr) #define ecb_expect(expr,value) __builtin_expect ((expr),(value)) #define ecb_prefetch(addr,rw,locality) __builtin_prefetch (addr, rw, locality) #else #define ecb_attribute(attrlist) #define ecb_is_constant(expr) 0 #define ecb_expect(expr,value) (expr) #define ecb_prefetch(addr,rw,locality) #endif /* no emulation for ecb_decltype */ #if ECB_GCC_VERSION(4,5) #define ecb_decltype(x) __decltype(x) #elif ECB_GCC_VERSION(3,0) #define ecb_decltype(x) __typeof(x) #endif #define ecb_noinline ecb_attribute ((__noinline__)) #define ecb_unused ecb_attribute ((__unused__)) #define ecb_const ecb_attribute ((__const__)) #define ecb_pure ecb_attribute ((__pure__)) #if ECB_C11 #define ecb_noreturn _Noreturn #else #define ecb_noreturn ecb_attribute ((__noreturn__)) #endif #if ECB_GCC_VERSION(4,3) #define ecb_artificial ecb_attribute ((__artificial__)) #define ecb_hot ecb_attribute ((__hot__)) #define ecb_cold ecb_attribute ((__cold__)) #else #define ecb_artificial #define ecb_hot #define ecb_cold #endif /* put around conditional expressions if you are very sure that the */ /* expression is mostly true or mostly false. note that these return */ /* booleans, not the expression. */ #define ecb_expect_false(expr) ecb_expect (!!(expr), 0) #define ecb_expect_true(expr) ecb_expect (!!(expr), 1) /* for compatibility to the rest of the world */ #define ecb_likely(expr) ecb_expect_true (expr) #define ecb_unlikely(expr) ecb_expect_false (expr) /* count trailing zero bits and count # of one bits */ #if ECB_GCC_VERSION(3,4) /* we assume int == 32 bit, long == 32 or 64 bit and long long == 64 bit */ #define ecb_ld32(x) (__builtin_clz (x) ^ 31) #define ecb_ld64(x) (__builtin_clzll (x) ^ 63) #define ecb_ctz32(x) __builtin_ctz (x) #define ecb_ctz64(x) __builtin_ctzll (x) #define ecb_popcount32(x) __builtin_popcount (x) /* no popcountll */ #else ecb_function_ int ecb_ctz32 (uint32_t x) ecb_const; ecb_function_ int ecb_ctz32 (uint32_t x) { int r = 0; x &= ~x + 1; /* this isolates the lowest bit */ #if ECB_branchless_on_i386 r += !!(x & 0xaaaaaaaa) << 0; r += !!(x & 0xcccccccc) << 1; r += !!(x & 0xf0f0f0f0) << 2; r += !!(x & 0xff00ff00) << 3; r += !!(x & 0xffff0000) << 4; #else if (x & 0xaaaaaaaa) r += 1; if (x & 0xcccccccc) r += 2; if (x & 0xf0f0f0f0) r += 4; if (x & 0xff00ff00) r += 8; if (x & 0xffff0000) r += 16; #endif return r; } ecb_function_ int ecb_ctz64 (uint64_t x) ecb_const; ecb_function_ int ecb_ctz64 (uint64_t x) { int shift = x & 0xffffffffU ? 0 : 32; return ecb_ctz32 (x >> shift) + shift; } ecb_function_ int ecb_popcount32 (uint32_t x) ecb_const; ecb_function_ int ecb_popcount32 (uint32_t x) { x -= (x >> 1) & 0x55555555; x = ((x >> 2) & 0x33333333) + (x & 0x33333333); x = ((x >> 4) + x) & 0x0f0f0f0f; x *= 0x01010101; return x >> 24; } ecb_function_ int ecb_ld32 (uint32_t x) ecb_const; ecb_function_ int ecb_ld32 (uint32_t x) { int r = 0; if (x >> 16) { x >>= 16; r += 16; } if (x >> 8) { x >>= 8; r += 8; } if (x >> 4) { x >>= 4; r += 4; } if (x >> 2) { x >>= 2; r += 2; } if (x >> 1) { r += 1; } return r; } ecb_function_ int ecb_ld64 (uint64_t x) ecb_const; ecb_function_ int ecb_ld64 (uint64_t x) { int r = 0; if (x >> 32) { x >>= 32; r += 32; } return r + ecb_ld32 (x); } #endif ecb_function_ ecb_bool ecb_is_pot32 (uint32_t x) ecb_const; ecb_function_ ecb_bool ecb_is_pot32 (uint32_t x) { return !(x & (x - 1)); } ecb_function_ ecb_bool ecb_is_pot64 (uint64_t x) ecb_const; ecb_function_ ecb_bool ecb_is_pot64 (uint64_t x) { return !(x & (x - 1)); } ecb_function_ uint8_t ecb_bitrev8 (uint8_t x) ecb_const; ecb_function_ uint8_t ecb_bitrev8 (uint8_t x) { return ( (x * 0x0802U & 0x22110U) | (x * 0x8020U & 0x88440U)) * 0x10101U >> 16; } ecb_function_ uint16_t ecb_bitrev16 (uint16_t x) ecb_const; ecb_function_ uint16_t ecb_bitrev16 (uint16_t x) { x = ((x >> 1) & 0x5555) | ((x & 0x5555) << 1); x = ((x >> 2) & 0x3333) | ((x & 0x3333) << 2); x = ((x >> 4) & 0x0f0f) | ((x & 0x0f0f) << 4); x = ( x >> 8 ) | ( x << 8); return x; } ecb_function_ uint32_t ecb_bitrev32 (uint32_t x) ecb_const; ecb_function_ uint32_t ecb_bitrev32 (uint32_t x) { x = ((x >> 1) & 0x55555555) | ((x & 0x55555555) << 1); x = ((x >> 2) & 0x33333333) | ((x & 0x33333333) << 2); x = ((x >> 4) & 0x0f0f0f0f) | ((x & 0x0f0f0f0f) << 4); x = ((x >> 8) & 0x00ff00ff) | ((x & 0x00ff00ff) << 8); x = ( x >> 16 ) | ( x << 16); return x; } /* popcount64 is only available on 64 bit cpus as gcc builtin */ /* so for this version we are lazy */ ecb_function_ int ecb_popcount64 (uint64_t x) ecb_const; ecb_function_ int ecb_popcount64 (uint64_t x) { return ecb_popcount32 (x) + ecb_popcount32 (x >> 32); } ecb_inline uint8_t ecb_rotl8 (uint8_t x, unsigned int count) ecb_const; ecb_inline uint8_t ecb_rotr8 (uint8_t x, unsigned int count) ecb_const; ecb_inline uint16_t ecb_rotl16 (uint16_t x, unsigned int count) ecb_const; ecb_inline uint16_t ecb_rotr16 (uint16_t x, unsigned int count) ecb_const; ecb_inline uint32_t ecb_rotl32 (uint32_t x, unsigned int count) ecb_const; ecb_inline uint32_t ecb_rotr32 (uint32_t x, unsigned int count) ecb_const; ecb_inline uint64_t ecb_rotl64 (uint64_t x, unsigned int count) ecb_const; ecb_inline uint64_t ecb_rotr64 (uint64_t x, unsigned int count) ecb_const; ecb_inline uint8_t ecb_rotl8 (uint8_t x, unsigned int count) { return (x >> ( 8 - count)) | (x << count); } ecb_inline uint8_t ecb_rotr8 (uint8_t x, unsigned int count) { return (x << ( 8 - count)) | (x >> count); } ecb_inline uint16_t ecb_rotl16 (uint16_t x, unsigned int count) { return (x >> (16 - count)) | (x << count); } ecb_inline uint16_t ecb_rotr16 (uint16_t x, unsigned int count) { return (x << (16 - count)) | (x >> count); } ecb_inline uint32_t ecb_rotl32 (uint32_t x, unsigned int count) { return (x >> (32 - count)) | (x << count); } ecb_inline uint32_t ecb_rotr32 (uint32_t x, unsigned int count) { return (x << (32 - count)) | (x >> count); } ecb_inline uint64_t ecb_rotl64 (uint64_t x, unsigned int count) { return (x >> (64 - count)) | (x << count); } ecb_inline uint64_t ecb_rotr64 (uint64_t x, unsigned int count) { return (x << (64 - count)) | (x >> count); } #if ECB_GCC_VERSION(4,3) #define ecb_bswap16(x) (__builtin_bswap32 (x) >> 16) #define ecb_bswap32(x) __builtin_bswap32 (x) #define ecb_bswap64(x) __builtin_bswap64 (x) #else ecb_function_ uint16_t ecb_bswap16 (uint16_t x) ecb_const; ecb_function_ uint16_t ecb_bswap16 (uint16_t x) { return ecb_rotl16 (x, 8); } ecb_function_ uint32_t ecb_bswap32 (uint32_t x) ecb_const; ecb_function_ uint32_t ecb_bswap32 (uint32_t x) { return (((uint32_t)ecb_bswap16 (x)) << 16) | ecb_bswap16 (x >> 16); } ecb_function_ uint64_t ecb_bswap64 (uint64_t x) ecb_const; ecb_function_ uint64_t ecb_bswap64 (uint64_t x) { return (((uint64_t)ecb_bswap32 (x)) << 32) | ecb_bswap32 (x >> 32); } #endif #if ECB_GCC_VERSION(4,5) #define ecb_unreachable() __builtin_unreachable () #else /* this seems to work fine, but gcc always emits a warning for it :/ */ ecb_inline void ecb_unreachable (void) ecb_noreturn; ecb_inline void ecb_unreachable (void) { } #endif /* try to tell the compiler that some condition is definitely true */ #define ecb_assume(cond) if (!(cond)) ecb_unreachable (); else 0 ecb_inline unsigned char ecb_byteorder_helper (void) ecb_const; ecb_inline unsigned char ecb_byteorder_helper (void) { /* the union code still generates code under pressure in gcc, */ /* but less than using pointers, and always seems to */ /* successfully return a constant. */ /* the reason why we have this horrible preprocessor mess */ /* is to avoid it in all cases, at least on common architectures */ /* or when using a recent enough gcc version (>= 4.6) */ #if __i386 || __i386__ || _M_X86 || __amd64 || __amd64__ || _M_X64 return 0x44; #elif __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ return 0x44; #elif __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ return 0x11; #else union { uint32_t i; uint8_t c; } u = { 0x11223344 }; return u.c; #endif } ecb_inline ecb_bool ecb_big_endian (void) ecb_const; ecb_inline ecb_bool ecb_big_endian (void) { return ecb_byteorder_helper () == 0x11; } ecb_inline ecb_bool ecb_little_endian (void) ecb_const; ecb_inline ecb_bool ecb_little_endian (void) { return ecb_byteorder_helper () == 0x44; } #if ECB_GCC_VERSION(3,0) || ECB_C99 #define ecb_mod(m,n) ((m) % (n) + ((m) % (n) < 0 ? (n) : 0)) #else #define ecb_mod(m,n) ((m) < 0 ? ((n) - 1 - ((-1 - (m)) % (n))) : ((m) % (n))) #endif #if __cplusplus template static inline T ecb_div_rd (T val, T div) { return val < 0 ? - ((-val + div - 1) / div) : (val ) / div; } template static inline T ecb_div_ru (T val, T div) { return val < 0 ? - ((-val ) / div) : (val + div - 1) / div; } #else #define ecb_div_rd(val,div) ((val) < 0 ? - ((-(val) + (div) - 1) / (div)) : ((val) ) / (div)) #define ecb_div_ru(val,div) ((val) < 0 ? - ((-(val) ) / (div)) : ((val) + (div) - 1) / (div)) #endif #if ecb_cplusplus_does_not_suck /* does not work for local types (http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2657.htm) */ template static inline int ecb_array_length (const T (&arr)[N]) { return N; } #else #define ecb_array_length(name) (sizeof (name) / sizeof (name [0])) #endif /*******************************************************************************/ /* floating point stuff, can be disabled by defining ECB_NO_LIBM */ /* basically, everything uses "ieee pure-endian" floating point numbers */ /* the only noteworthy exception is ancient armle, which uses order 43218765 */ #if 0 \ || __i386 || __i386__ \ || __amd64 || __amd64__ || __x86_64 || __x86_64__ \ || __powerpc__ || __ppc__ || __powerpc64__ || __ppc64__ \ || defined __arm__ && defined __ARM_EABI__ \ || defined __s390__ || defined __s390x__ \ || defined __mips__ \ || defined __alpha__ \ || defined __hppa__ \ || defined __ia64__ \ || defined _M_IX86 || defined _M_AMD64 || defined _M_IA64 #define ECB_STDFP 1 #include /* for memcpy */ #else #define ECB_STDFP 0 #include /* for frexp*, ldexp* */ #endif #ifndef ECB_NO_LIBM /* convert a float to ieee single/binary32 */ ecb_function_ uint32_t ecb_float_to_binary32 (float x) ecb_const; ecb_function_ uint32_t ecb_float_to_binary32 (float x) { uint32_t r; #if ECB_STDFP memcpy (&r, &x, 4); #else /* slow emulation, works for anything but -0 */ uint32_t m; int e; if (x == 0e0f ) return 0x00000000U; if (x > +3.40282346638528860e+38f) return 0x7f800000U; if (x < -3.40282346638528860e+38f) return 0xff800000U; if (x != x ) return 0x7fbfffffU; m = frexpf (x, &e) * 0x1000000U; r = m & 0x80000000U; if (r) m = -m; if (e <= -126) { m &= 0xffffffU; m >>= (-125 - e); e = -126; } r |= (e + 126) << 23; r |= m & 0x7fffffU; #endif return r; } /* converts an ieee single/binary32 to a float */ ecb_function_ float ecb_binary32_to_float (uint32_t x) ecb_const; ecb_function_ float ecb_binary32_to_float (uint32_t x) { float r; #if ECB_STDFP memcpy (&r, &x, 4); #else /* emulation, only works for normals and subnormals and +0 */ int neg = x >> 31; int e = (x >> 23) & 0xffU; x &= 0x7fffffU; if (e) x |= 0x800000U; else e = 1; /* we distrust ldexpf a bit and do the 2**-24 scaling by an extra multiply */ r = ldexpf (x * (0.5f / 0x800000U), e - 126); r = neg ? -r : r; #endif return r; } /* convert a double to ieee double/binary64 */ ecb_function_ uint64_t ecb_double_to_binary64 (double x) ecb_const; ecb_function_ uint64_t ecb_double_to_binary64 (double x) { uint64_t r; #if ECB_STDFP memcpy (&r, &x, 8); #else /* slow emulation, works for anything but -0 */ uint64_t m; int e; if (x == 0e0 ) return 0x0000000000000000U; if (x > +1.79769313486231470e+308) return 0x7ff0000000000000U; if (x < -1.79769313486231470e+308) return 0xfff0000000000000U; if (x != x ) return 0X7ff7ffffffffffffU; m = frexp (x, &e) * 0x20000000000000U; r = m & 0x8000000000000000;; if (r) m = -m; if (e <= -1022) { m &= 0x1fffffffffffffU; m >>= (-1021 - e); e = -1022; } r |= ((uint64_t)(e + 1022)) << 52; r |= m & 0xfffffffffffffU; #endif return r; } /* converts an ieee double/binary64 to a double */ ecb_function_ double ecb_binary64_to_double (uint64_t x) ecb_const; ecb_function_ double ecb_binary64_to_double (uint64_t x) { double r; #if ECB_STDFP memcpy (&r, &x, 8); #else /* emulation, only works for normals and subnormals and +0 */ int neg = x >> 63; int e = (x >> 52) & 0x7ffU; x &= 0xfffffffffffffU; if (e) x |= 0x10000000000000U; else e = 1; /* we distrust ldexp a bit and do the 2**-53 scaling by an extra multiply */ r = ldexp (x * (0.5 / 0x10000000000000U), e - 1022); r = neg ? -r : r; #endif return r; } #endif #endif IO-AIO-4.18/libeio/libeio.m40000644000000000000000000001340311762471705014044 0ustar rootrootdnl openbsd in it's neverending brokenness requires stdint.h for intptr_t, dnl but that header isn't very portable... AC_CHECK_HEADERS([stdint.h sys/syscall.h sys/prctl.h]) AC_SEARCH_LIBS( pthread_create, [pthread pthreads pthreadVC2], , [AC_MSG_ERROR(pthread functions not found)] ) AC_CACHE_CHECK(for utimes, ac_cv_utimes, [AC_LINK_IFELSE([[ #include #include #include struct timeval tv[2]; int res; int main (void) { res = utimes ("/", tv); return 0; } ]],ac_cv_utimes=yes,ac_cv_utimes=no)]) test $ac_cv_utimes = yes && AC_DEFINE(HAVE_UTIMES, 1, utimes(2) is available) AC_CACHE_CHECK(for futimes, ac_cv_futimes, [AC_LINK_IFELSE([[ #include #include #include struct timeval tv[2]; int res; int fd; int main (void) { res = futimes (fd, tv); return 0; } ]],ac_cv_futimes=yes,ac_cv_futimes=no)]) test $ac_cv_futimes = yes && AC_DEFINE(HAVE_FUTIMES, 1, futimes(2) is available) AC_CACHE_CHECK(for readahead, ac_cv_readahead, [AC_LINK_IFELSE([ #include int main (void) { int fd = 0; size_t count = 2; ssize_t res; res = readahead (fd, 0, count); return 0; } ],ac_cv_readahead=yes,ac_cv_readahead=no)]) test $ac_cv_readahead = yes && AC_DEFINE(HAVE_READAHEAD, 1, readahead(2) is available (linux)) AC_CACHE_CHECK(for fdatasync, ac_cv_fdatasync, [AC_LINK_IFELSE([ #include int main (void) { int fd = 0; fdatasync (fd); return 0; } ],ac_cv_fdatasync=yes,ac_cv_fdatasync=no)]) test $ac_cv_fdatasync = yes && AC_DEFINE(HAVE_FDATASYNC, 1, fdatasync(2) is available) AC_CACHE_CHECK(for sendfile, ac_cv_sendfile, [AC_LINK_IFELSE([ # include #if __linux # include #elif __FreeBSD__ || defined __APPLE__ # include # include #elif __hpux # include #else # error unsupported architecture #endif int main (void) { int fd = 0; off_t offset = 1; size_t count = 2; ssize_t res; #if __linux res = sendfile (fd, fd, offset, count); #elif __FreeBSD__ res = sendfile (fd, fd, offset, count, 0, &offset, 0); #elif __hpux res = sendfile (fd, fd, offset, count, 0, 0); #endif return 0; } ],ac_cv_sendfile=yes,ac_cv_sendfile=no)]) test $ac_cv_sendfile = yes && AC_DEFINE(HAVE_SENDFILE, 1, sendfile(2) is available and supported) AC_CACHE_CHECK(for sync_file_range, ac_cv_sync_file_range, [AC_LINK_IFELSE([ #include int main (void) { int fd = 0; off64_t offset = 1; off64_t nbytes = 1; unsigned int flags = SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER; ssize_t res; res = sync_file_range (fd, offset, nbytes, flags); return 0; } ],ac_cv_sync_file_range=yes,ac_cv_sync_file_range=no)]) test $ac_cv_sync_file_range = yes && AC_DEFINE(HAVE_SYNC_FILE_RANGE, 1, sync_file_range(2) is available) AC_CACHE_CHECK(for fallocate, ac_cv_linux_fallocate, [AC_LINK_IFELSE([ #include int main (void) { int fd = 0; int mode = 0; off_t offset = 1; off_t len = 1; int res; res = fallocate (fd, mode, offset, len); return 0; } ],ac_cv_linux_fallocate=yes,ac_cv_linux_fallocate=no)]) test $ac_cv_linux_fallocate = yes && AC_DEFINE(HAVE_LINUX_FALLOCATE, 1, fallocate(2) is available) AC_CACHE_CHECK(for sys_syncfs, ac_cv_sys_syncfs, [AC_LINK_IFELSE([ #include #include int main (void) { int res = syscall (__NR_syncfs, (int)0); } ],ac_cv_sys_syncfs=yes,ac_cv_sys_syncfs=no)]) test $ac_cv_sys_syncfs = yes && AC_DEFINE(HAVE_SYS_SYNCFS, 1, syscall(__NR_syncfs) is available) AC_CACHE_CHECK(for prctl_set_name, ac_cv_prctl_set_name, [AC_LINK_IFELSE([ #include int main (void) { char name[] = "test123"; int res = prctl (PR_SET_NAME, (unsigned long)name, 0, 0, 0); } ],ac_cv_prctl_set_name=yes,ac_cv_prctl_set_name=no)]) test $ac_cv_prctl_set_name = yes && AC_DEFINE(HAVE_PRCTL_SET_NAME, 1, prctl(PR_SET_NAME) is available) dnl ############################################################################# dnl # these checks exist for the benefit of IO::AIO dnl at least uclibc defines _POSIX_ADVISORY_INFO without *any* of the required dnl functionality actually being present. ugh. AC_CACHE_CHECK(for posix_madvise, ac_cv_posix_madvise, [AC_LINK_IFELSE([ #include int main (void) { int res = posix_madvise ((void *)0, (size_t)0, POSIX_MADV_NORMAL); int a = POSIX_MADV_SEQUENTIAL; int b = POSIX_MADV_RANDOM; int c = POSIX_MADV_WILLNEED; int d = POSIX_MADV_DONTNEED; return 0; } ],ac_cv_posix_madvise=yes,ac_cv_posix_madvise=no)]) test $ac_cv_posix_madvise = yes && AC_DEFINE(HAVE_POSIX_MADVISE, 1, posix_madvise(2) is available) AC_CACHE_CHECK(for posix_fadvise, ac_cv_posix_fadvise, [AC_LINK_IFELSE([ #define _XOPEN_SOURCE 600 #include int main (void) { int res = posix_fadvise ((int)0, (off_t)0, (off_t)0, POSIX_FADV_NORMAL); int a = POSIX_FADV_SEQUENTIAL; int b = POSIX_FADV_NOREUSE; int c = POSIX_FADV_RANDOM; int d = POSIX_FADV_WILLNEED; int e = POSIX_FADV_DONTNEED; return 0; } ],ac_cv_posix_fadvise=yes,ac_cv_posix_fadvise=no)]) test $ac_cv_posix_fadvise = yes && AC_DEFINE(HAVE_POSIX_FADVISE, 1, posix_fadvise(2) is available) dnl lots of linux specifics AC_CHECK_HEADERS([linux/fs.h linux/fiemap.h]) AC_CACHE_CHECK([for splice, vmsplice and tee], ac_cv_linux_splice, [AC_LINK_IFELSE([ #include int main (void) { ssize_t res; res = splice ((int)0, (loff_t)0, (int)0, (loff_t *)0, (size_t)0, SPLICE_F_MOVE | SPLICE_F_NONBLOCK | SPLICE_F_MORE); res = tee ((int)0, (int)0, (size_t)0, SPLICE_F_NONBLOCK); res = vmsplice ((int)0, (struct iovec *)0, 0, SPLICE_F_NONBLOCK | SPLICE_F_GIFT); return 0; } ],ac_cv_linux_splice=yes,ac_cv_linux_splice=no)]) test $ac_cv_linux_splice = yes && AC_DEFINE(HAVE_LINUX_SPLICE, 1, splice/vmsplice/tee(2) are available) IO-AIO-4.18/schmorp.h0000644000000000000000000002427611737535644012737 0ustar rootroot#ifndef SCHMORP_PERL_H_ #define SCHMORP_PERL_H_ /* WARNING * This header file is a shared resource between many modules. * perl header files MUST already be included. */ #include #include #if defined(WIN32 ) || defined(_MINIX) # define SCHMORP_H_PREFER_SELECT 1 #endif #if !SCHMORP_H_PREFER_SELECT # include #endif /* useful stuff, used by schmorp mostly */ #include "patchlevel.h" #define PERL_VERSION_ATLEAST(a,b,c) \ (PERL_REVISION > (a) \ || (PERL_REVISION == (a) \ && (PERL_VERSION > (b) \ || (PERL_VERSION == (b) && PERL_SUBVERSION >= (c))))) #ifndef PERL_MAGIC_ext # define PERL_MAGIC_ext '~' #endif #if !PERL_VERSION_ATLEAST (5,6,0) # ifndef PL_ppaddr # define PL_ppaddr ppaddr # endif # ifndef call_sv # define call_sv perl_call_sv # endif # ifndef get_sv # define get_sv perl_get_sv # endif # ifndef get_cv # define get_cv perl_get_cv # endif # ifndef IS_PADGV # define IS_PADGV(v) 0 # endif # ifndef IS_PADCONST # define IS_PADCONST(v) 0 # endif #endif /* use NV for 32 bit perls as it allows larger offsets */ #if IVSIZE >= 8 typedef IV VAL64; # define SvVAL64(sv) SvIV (sv) # define newSVval64(i64) newSViv (i64) #else typedef NV VAL64; # define SvVAL64(sv) SvNV (sv) # define newSVval64(i64) newSVnv (i64) #endif /* typemap for the above */ /* VAL64 T_VAL64 INPUT T_VAL64 $var = ($type)SvVAL64 ($arg); OUTPUT T_VAL64 $arg = newSVval64 ($var); */ /* 5.11 */ #ifndef CxHASARGS # define CxHASARGS(cx) (cx)->blk_sub.hasargs #endif /* 5.10.0 */ #ifndef SvREFCNT_inc_NN # define SvREFCNT_inc_NN(sv) SvREFCNT_inc (sv) #endif /* 5.8.8 */ #ifndef GV_NOTQUAL # define GV_NOTQUAL 0 #endif #ifndef newSV # define newSV(l) NEWSV(0,l) #endif #ifndef CvISXSUB_on # define CvISXSUB_on(cv) (void)cv #endif #ifndef CvISXSUB # define CvISXSUB(cv) (CvXSUB (cv) ? TRUE : FALSE) #endif #ifndef Newx # define Newx(ptr,nitems,type) New (0,ptr,nitems,type) #endif /* 5.8.7 */ #ifndef SvRV_set # define SvRV_set(s,v) SvRV(s) = (v) #endif static int s_signum (SV *sig) { #ifndef SIG_SIZE /* kudos to Slaven Rezic for the idea */ static char sig_size [] = { SIG_NUM }; # define SIG_SIZE (sizeof (sig_size) + 1) #endif dTHX; int signum; SvGETMAGIC (sig); for (signum = 1; signum < SIG_SIZE; ++signum) if (strEQ (SvPV_nolen (sig), PL_sig_name [signum])) return signum; signum = SvIV (sig); if (signum > 0 && signum < SIG_SIZE) return signum; return -1; } static int s_signum_croak (SV *sig) { int signum = s_signum (sig); if (signum < 0) { dTHX; croak ("%s: invalid signal name or number", SvPV_nolen (sig)); } return signum; } static int s_fileno (SV *fh, int wr) { dTHX; SvGETMAGIC (fh); if (SvROK (fh)) { fh = SvRV (fh); SvGETMAGIC (fh); } if (SvTYPE (fh) == SVt_PVGV) return PerlIO_fileno (wr ? IoOFP (sv_2io (fh)) : IoIFP (sv_2io (fh))); if (SvOK (fh) && (SvIV (fh) >= 0) && (SvIV (fh) < 0x7fffffffL)) return SvIV (fh); return -1; } static int s_fileno_croak (SV *fh, int wr) { int fd = s_fileno (fh, wr); if (fd < 0) { dTHX; croak ("%s: illegal fh argument, either not an OS file or read/write mode mismatch", SvPV_nolen (fh)); } return fd; } static SV * s_get_cv (SV *cb_sv) { dTHX; HV *st; GV *gvp; return (SV *)sv_2cv (cb_sv, &st, &gvp, 0); } static SV * s_get_cv_croak (SV *cb_sv) { SV *cv = s_get_cv (cb_sv); if (!cv) { dTHX; croak ("%s: callback must be a CODE reference or another callable object", SvPV_nolen (cb_sv)); } return cv; } /*****************************************************************************/ /* gensub: simple closure generation utility */ #define S_GENSUB_ARG CvXSUBANY (cv).any_ptr /* create a closure from XS, returns a code reference */ /* the arg can be accessed via GENSUB_ARG from the callback */ /* the callback must use dXSARGS/XSRETURN */ static SV * s_gensub (pTHX_ void (*xsub)(pTHX_ CV *), void *arg) { CV *cv = (CV *)newSV (0); sv_upgrade ((SV *)cv, SVt_PVCV); CvANON_on (cv); CvISXSUB_on (cv); CvXSUB (cv) = xsub; S_GENSUB_ARG = arg; return newRV_noinc ((SV *)cv); } /*****************************************************************************/ /* portable pipe/socketpair */ #ifdef USE_SOCKETS_AS_HANDLES # define S_TO_HANDLE(x) ((HANDLE)win32_get_osfhandle (x)) #else # define S_TO_HANDLE(x) ((HANDLE)x) #endif #ifdef _WIN32 /* taken almost verbatim from libev's ev_win32.c */ /* oh, the humanity! */ static int s_pipe (int filedes [2]) { dTHX; struct sockaddr_in addr = { 0 }; int addr_size = sizeof (addr); struct sockaddr_in adr2; int adr2_size = sizeof (adr2); SOCKET listener; SOCKET sock [2] = { -1, -1 }; if ((listener = socket (AF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET) return -1; addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl (INADDR_LOOPBACK); addr.sin_port = 0; if (bind (listener, (struct sockaddr *)&addr, addr_size)) goto fail; if (getsockname (listener, (struct sockaddr *)&addr, &addr_size)) goto fail; if (listen (listener, 1)) goto fail; if ((sock [0] = socket (AF_INET, SOCK_STREAM, 0)) == INVALID_SOCKET) goto fail; if (connect (sock [0], (struct sockaddr *)&addr, addr_size)) goto fail; if ((sock [1] = accept (listener, 0, 0)) < 0) goto fail; /* windows vista returns fantasy port numbers for getpeername. * example for two interconnected tcp sockets: * * (Socket::unpack_sockaddr_in getsockname $sock0)[0] == 53364 * (Socket::unpack_sockaddr_in getpeername $sock0)[0] == 53363 * (Socket::unpack_sockaddr_in getsockname $sock1)[0] == 53363 * (Socket::unpack_sockaddr_in getpeername $sock1)[0] == 53365 * * wow! tridirectional sockets! * * this way of checking ports seems to work: */ if (getpeername (sock [0], (struct sockaddr *)&addr, &addr_size)) goto fail; if (getsockname (sock [1], (struct sockaddr *)&adr2, &adr2_size)) goto fail; errno = WSAEINVAL; if (addr_size != adr2_size || addr.sin_addr.s_addr != adr2.sin_addr.s_addr /* just to be sure, I mean, it's windows */ || addr.sin_port != adr2.sin_port) goto fail; closesocket (listener); #ifdef USE_SOCKETS_AS_HANDLES /* when select isn't winsocket, we also expect socket, connect, accept etc. * to work on fds */ filedes [0] = sock [0]; filedes [1] = sock [1]; #else filedes [0] = _open_osfhandle (sock [0], 0); filedes [1] = _open_osfhandle (sock [1], 0); #endif return 0; fail: closesocket (listener); if (sock [0] != INVALID_SOCKET) closesocket (sock [0]); if (sock [1] != INVALID_SOCKET) closesocket (sock [1]); return -1; } #define s_socketpair(domain,type,protocol,filedes) s_pipe (filedes) static int s_fd_blocking (int fd, int blocking) { u_long nonblocking = !blocking; return ioctlsocket ((SOCKET)S_TO_HANDLE (fd), FIONBIO, &nonblocking); } #define s_fd_prepare(fd) s_fd_blocking (fd, 0) #else #define s_socketpair(domain,type,protocol,filedes) socketpair (domain, type, protocol, filedes) #define s_pipe(filedes) pipe (filedes) static int s_fd_blocking (int fd, int blocking) { return fcntl (fd, F_SETFL, blocking ? 0 : O_NONBLOCK); } static int s_fd_prepare (int fd) { return s_fd_blocking (fd, 0) || fcntl (fd, F_SETFD, FD_CLOEXEC); } #endif #if __linux && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 7)) # define SCHMORP_H_HAVE_EVENTFD 1 /* our minimum requirement is glibc 2.7 which has the stub, but not the header */ # include # ifdef __cplusplus extern "C" { # endif int eventfd (unsigned int initval, int flags); # ifdef __cplusplus } # endif #else # define eventfd(initval,flags) -1 #endif typedef struct { int fd[2]; /* read, write fd, might be equal */ int len; /* write length (1 pipe/socket, 8 eventfd) */ } s_epipe; static int s_epipe_new (s_epipe *epp) { s_epipe ep; ep.fd [0] = ep.fd [1] = eventfd (0, 0); if (ep.fd [0] >= 0) { s_fd_prepare (ep.fd [0]); ep.len = 8; } else { if (s_pipe (ep.fd)) return -1; if (s_fd_prepare (ep.fd [0]) || s_fd_prepare (ep.fd [1])) { dTHX; close (ep.fd [0]); close (ep.fd [1]); return -1; } ep.len = 1; } *epp = ep; return 0; } static void s_epipe_destroy (s_epipe *epp) { dTHX; close (epp->fd [0]); if (epp->fd [1] != epp->fd [0]) close (epp->fd [1]); epp->len = 0; } static void s_epipe_signal (s_epipe *epp) { #ifdef _WIN32 /* perl overrides send with a function that crashes in other threads. * unfortunately, it overrides it with an argument-less macro, so * there is no way to force usage of the real send function. * incompetent windows programmers - is this redundant? */ DWORD dummy; WriteFile (S_TO_HANDLE (epp->fd [1]), (LPCVOID)&dummy, 1, &dummy, 0); #else # if SCHMORP_H_HAVE_EVENTFD static uint64_t counter = 1; # else static char counter [8]; # endif /* some modules accept fd's from outside, support eventfd here */ if (write (epp->fd [1], &counter, epp->len) < 0 && errno == EINVAL && epp->len != 8) write (epp->fd [1], &counter, (epp->len = 8)); #endif } static void s_epipe_drain (s_epipe *epp) { dTHX; char buf [9]; #ifdef _WIN32 recv (epp->fd [0], buf, sizeof (buf), 0); #else read (epp->fd [0], buf, sizeof (buf)); #endif } /* like new, but dups over old */ static int s_epipe_renew (s_epipe *epp) { dTHX; s_epipe epn; if (epp->fd [1] != epp->fd [0]) close (epp->fd [1]); if (s_epipe_new (&epn)) return -1; if (epp->len) { if (dup2 (epn.fd [0], epp->fd [0]) < 0) croak ("unable to dup over old event pipe"); /* should not croak */ close (epn.fd [0]); if (epn.fd [0] == epn.fd [1]) epn.fd [1] = epp->fd [0]; epn.fd [0] = epp->fd [0]; } *epp = epn; return 0; } #define s_epipe_fd(epp) ((epp)->fd [0]) static int s_epipe_wait (s_epipe *epp) { dTHX; #if SCHMORP_H_PREFER_SELECT fd_set rfd; int fd = s_epipe_fd (epp); FD_ZERO (&rfd); FD_SET (fd, &rfd); return PerlSock_select (fd + 1, &rfd, 0, 0, 0); #else /* poll is preferable on posix systems */ struct pollfd pfd; pfd.fd = s_epipe_fd (epp); pfd.events = POLLIN; return poll (&pfd, 1, -1); #endif } #endif IO-AIO-4.18/AIO.pm0000644000000000000000000022553712035451261012044 0ustar rootroot=head1 NAME IO::AIO - Asynchronous Input/Output =head1 SYNOPSIS use IO::AIO; aio_open "/etc/passwd", IO::AIO::O_RDONLY, 0, sub { my $fh = shift or die "/etc/passwd: $!"; ... }; aio_unlink "/tmp/file", sub { }; aio_read $fh, 30000, 1024, $buffer, 0, sub { $_[0] > 0 or die "read error: $!"; }; # version 2+ has request and group objects use IO::AIO 2; aioreq_pri 4; # give next request a very high priority my $req = aio_unlink "/tmp/file", sub { }; $req->cancel; # cancel request if still in queue my $grp = aio_group sub { print "all stats done\n" }; add $grp aio_stat "..." for ...; =head1 DESCRIPTION This module implements asynchronous I/O using whatever means your operating system supports. It is implemented as an interface to C (L). Asynchronous means that operations that can normally block your program (e.g. reading from disk) will be done asynchronously: the operation will still block, but you can do something else in the meantime. This is extremely useful for programs that need to stay interactive even when doing heavy I/O (GUI programs, high performance network servers etc.), but can also be used to easily do operations in parallel that are normally done sequentially, e.g. stat'ing many files, which is much faster on a RAID volume or over NFS when you do a number of stat operations concurrently. While most of this works on all types of file descriptors (for example sockets), using these functions on file descriptors that support nonblocking operation (again, sockets, pipes etc.) is very inefficient. Use an event loop for that (such as the L module): IO::AIO will naturally fit into such an event loop itself. In this version, a number of threads are started that execute your requests and signal their completion. You don't need thread support in perl, and the threads created by this module will not be visible to perl. In the future, this module might make use of the native aio functions available on many operating systems. However, they are often not well-supported or restricted (GNU/Linux doesn't allow them on normal files currently, for example), and they would only support aio_read and aio_write, so the remaining functionality would have to be implemented using threads anyway. Although the module will work in the presence of other (Perl-) threads, it is currently not reentrant in any way, so use appropriate locking yourself, always call C from within the same thread, or never call C (or other C functions) recursively. =head2 EXAMPLE This is a simple example that uses the EV module and loads F asynchronously: use Fcntl; use EV; use IO::AIO; # register the IO::AIO callback with EV my $aio_w = EV::io IO::AIO::poll_fileno, EV::READ, \&IO::AIO::poll_cb; # queue the request to open /etc/passwd aio_open "/etc/passwd", IO::AIO::O_RDONLY, 0, sub { my $fh = shift or die "error while opening: $!"; # stat'ing filehandles is generally non-blocking my $size = -s $fh; # queue a request to read the file my $contents; aio_read $fh, 0, $size, $contents, 0, sub { $_[0] == $size or die "short read: $!"; close $fh; # file contents now in $contents print $contents; # exit event loop and program EV::unloop; }; }; # possibly queue up other requests, or open GUI windows, # check for sockets etc. etc. # process events as long as there are some: EV::loop; =head1 REQUEST ANATOMY AND LIFETIME Every C function creates a request. which is a C data structure not directly visible to Perl. If called in non-void context, every request function returns a Perl object representing the request. In void context, nothing is returned, which saves a bit of memory. The perl object is a fairly standard ref-to-hash object. The hash contents are not used by IO::AIO so you are free to store anything you like in it. During their existance, aio requests travel through the following states, in order: =over 4 =item ready Immediately after a request is created it is put into the ready state, waiting for a thread to execute it. =item execute A thread has accepted the request for processing and is currently executing it (e.g. blocking in read). =item pending The request has been executed and is waiting for result processing. While request submission and execution is fully asynchronous, result processing is not and relies on the perl interpreter calling C (or another function with the same effect). =item result The request results are processed synchronously by C. The C function will process all outstanding aio requests by calling their callbacks, freeing memory associated with them and managing any groups they are contained in. =item done Request has reached the end of its lifetime and holds no resources anymore (except possibly for the Perl object, but its connection to the actual aio request is severed and calling its methods will either do nothing or result in a runtime error). =back =cut package IO::AIO; use Carp (); use common::sense; use base 'Exporter'; BEGIN { our $VERSION = '4.18'; our @AIO_REQ = qw(aio_sendfile aio_seek aio_read aio_write aio_open aio_close aio_stat aio_lstat aio_unlink aio_rmdir aio_readdir aio_readdirx aio_scandir aio_symlink aio_readlink aio_realpath aio_sync aio_fsync aio_syncfs aio_fdatasync aio_sync_file_range aio_allocate aio_pathsync aio_readahead aio_fiemap aio_rename aio_link aio_move aio_copy aio_group aio_nop aio_mknod aio_load aio_rmtree aio_mkdir aio_chown aio_chmod aio_utime aio_truncate aio_msync aio_mtouch aio_mlock aio_mlockall aio_statvfs aio_wd); our @EXPORT = (@AIO_REQ, qw(aioreq_pri aioreq_nice)); our @EXPORT_OK = qw(poll_fileno poll_cb poll_wait flush min_parallel max_parallel max_idle idle_timeout nreqs nready npending nthreads max_poll_time max_poll_reqs sendfile fadvise madvise mmap munmap munlock munlockall); push @AIO_REQ, qw(aio_busy); # not exported @IO::AIO::GRP::ISA = 'IO::AIO::REQ'; require XSLoader; XSLoader::load ("IO::AIO", $VERSION); } =head1 FUNCTIONS =head2 QUICK OVERVIEW This section simply lists the prototypes most of the functions for quick reference. See the following sections for function-by-function documentation. aio_wd $pathname, $callback->($wd) aio_open $pathname, $flags, $mode, $callback->($fh) aio_close $fh, $callback->($status) aio_seek $fh,$offset,$whence, $callback->($offs) aio_read $fh,$offset,$length, $data,$dataoffset, $callback->($retval) aio_write $fh,$offset,$length, $data,$dataoffset, $callback->($retval) aio_sendfile $out_fh, $in_fh, $in_offset, $length, $callback->($retval) aio_readahead $fh,$offset,$length, $callback->($retval) aio_stat $fh_or_path, $callback->($status) aio_lstat $fh, $callback->($status) aio_statvfs $fh_or_path, $callback->($statvfs) aio_utime $fh_or_path, $atime, $mtime, $callback->($status) aio_chown $fh_or_path, $uid, $gid, $callback->($status) aio_chmod $fh_or_path, $mode, $callback->($status) aio_truncate $fh_or_path, $offset, $callback->($status) aio_allocate $fh, $mode, $offset, $len, $callback->($status) aio_fiemap $fh, $start, $length, $flags, $count, $cb->(\@extents) aio_unlink $pathname, $callback->($status) aio_mknod $pathname, $mode, $dev, $callback->($status) aio_link $srcpath, $dstpath, $callback->($status) aio_symlink $srcpath, $dstpath, $callback->($status) aio_readlink $pathname, $callback->($link) aio_realpath $pathname, $callback->($link) aio_rename $srcpath, $dstpath, $callback->($status) aio_mkdir $pathname, $mode, $callback->($status) aio_rmdir $pathname, $callback->($status) aio_readdir $pathname, $callback->($entries) aio_readdirx $pathname, $flags, $callback->($entries, $flags) IO::AIO::READDIR_DENTS IO::AIO::READDIR_DIRS_FIRST IO::AIO::READDIR_STAT_ORDER IO::AIO::READDIR_FOUND_UNKNOWN aio_scandir $pathname, $maxreq, $callback->($dirs, $nondirs) aio_load $pathname, $data, $callback->($status) aio_copy $srcpath, $dstpath, $callback->($status) aio_move $srcpath, $dstpath, $callback->($status) aio_rmtree $pathname, $callback->($status) aio_sync $callback->($status) aio_syncfs $fh, $callback->($status) aio_fsync $fh, $callback->($status) aio_fdatasync $fh, $callback->($status) aio_sync_file_range $fh, $offset, $nbytes, $flags, $callback->($status) aio_pathsync $pathname, $callback->($status) aio_msync $scalar, $offset = 0, $length = undef, flags = 0, $callback->($status) aio_mtouch $scalar, $offset = 0, $length = undef, flags = 0, $callback->($status) aio_mlock $scalar, $offset = 0, $length = undef, $callback->($status) aio_mlockall $flags, $callback->($status) aio_group $callback->(...) aio_nop $callback->() $prev_pri = aioreq_pri [$pri] aioreq_nice $pri_adjust IO::AIO::poll_wait IO::AIO::poll_cb IO::AIO::poll IO::AIO::flush IO::AIO::max_poll_reqs $nreqs IO::AIO::max_poll_time $seconds IO::AIO::min_parallel $nthreads IO::AIO::max_parallel $nthreads IO::AIO::max_idle $nthreads IO::AIO::idle_timeout $seconds IO::AIO::max_outstanding $maxreqs IO::AIO::nreqs IO::AIO::nready IO::AIO::npending IO::AIO::sendfile $ofh, $ifh, $offset, $count IO::AIO::fadvise $fh, $offset, $len, $advice IO::AIO::mmap $scalar, $length, $prot, $flags[, $fh[, $offset]] IO::AIO::munmap $scalar IO::AIO::madvise $scalar, $offset, $length, $advice IO::AIO::mprotect $scalar, $offset, $length, $protect IO::AIO::munlock $scalar, $offset = 0, $length = undef IO::AIO::munlockall =head2 API NOTES All the C calls are more or less thin wrappers around the syscall with the same name (sans C). The arguments are similar or identical, and they all accept an additional (and optional) C<$callback> argument which must be a code reference. This code reference will be called after the syscall has been executed in an asynchronous fashion. The results of the request will be passed as arguments to the callback (and, if an error occured, in C<$!>) - for most requests the syscall return code (e.g. most syscalls return C<-1> on error, unlike perl, which usually delivers "false"). Some requests (such as C) pass the actual results and communicate failures by passing C. All functions expecting a filehandle keep a copy of the filehandle internally until the request has finished. All functions return request objects of type L that allow further manipulation of those requests while they are in-flight. The pathnames you pass to these routines I be absolute. The reason for this is that at the time the request is being executed, the current working directory could have changed. Alternatively, you can make sure that you never change the current working directory anywhere in the program and then use relative paths. You can also take advantage of IO::AIOs working directory abstraction, that lets you specify paths relative to some previously-opened "working directory object" - see the description of the C class later in this document. To encode pathnames as octets, either make sure you either: a) always pass in filenames you got from outside (command line, readdir etc.) without tinkering, b) are in your native filesystem encoding, c) use the Encode module and encode your pathnames to the locale (or other) encoding in effect in the user environment, d) use Glib::filename_from_unicode on unicode filenames or e) use something else to ensure your scalar has the correct contents. This works, btw. independent of the internal UTF-8 bit, which IO::AIO handles correctly whether it is set or not. =head2 AIO REQUEST FUNCTIONS =over 4 =item $prev_pri = aioreq_pri [$pri] Returns the priority value that would be used for the next request and, if C<$pri> is given, sets the priority for the next aio request. The default priority is C<0>, the minimum and maximum priorities are C<-4> and C<4>, respectively. Requests with higher priority will be serviced first. The priority will be reset to C<0> after each call to one of the C functions. Example: open a file with low priority, then read something from it with higher priority so the read request is serviced before other low priority open requests (potentially spamming the cache): aioreq_pri -3; aio_open ..., sub { return unless $_[0]; aioreq_pri -2; aio_read $_[0], ..., sub { ... }; }; =item aioreq_nice $pri_adjust Similar to C, but subtracts the given value from the current priority, so the effect is cumulative. =item aio_open $pathname, $flags, $mode, $callback->($fh) Asynchronously open or create a file and call the callback with a newly created filehandle for the file (or C in case of an error). The pathname passed to C must be absolute. See API NOTES, above, for an explanation. The C<$flags> argument is a bitmask. See the C module for a list. They are the same as used by C. Likewise, C<$mode> specifies the mode of the newly created file, if it didn't exist and C has been given, just like perl's C, except that it is mandatory (i.e. use C<0> if you don't create new files, and C<0666> or C<0777> if you do). Note that the C<$mode> will be modified by the umask in effect then the request is being executed, so better never change the umask. Example: aio_open "/etc/passwd", IO::AIO::O_RDONLY, 0, sub { if ($_[0]) { print "open successful, fh is $_[0]\n"; ... } else { die "open failed: $!\n"; } }; In addition to all the common open modes/flags (C, C, C, C, C, C and C), the following POSIX and non-POSIX constants are available (missing ones on your system are, as usual, C<0>): C, C, C, C, C, C, C, C, C, C, C, C, C and C. =item aio_close $fh, $callback->($status) Asynchronously close a file and call the callback with the result code. Unfortunately, you can't do this to perl. Perl I very strongly on closing the file descriptor associated with the filehandle itself. Therefore, C will not close the filehandle - instead it will use dup2 to overwrite the file descriptor with the write-end of a pipe (the pipe fd will be created on demand and will be cached). Or in other words: the file descriptor will be closed, but it will not be free for reuse until the perl filehandle is closed. =cut =item aio_seek $fh, $offset, $whence, $callback->($offs) Seeks the filehandle to the new C<$offset>, similarly to perl's C. The C<$whence> can use the traditional values (C<0> for C, C<1> for C or C<2> for C). The resulting absolute offset will be passed to the callback, or C<-1> in case of an error. In theory, the C<$whence> constants could be different than the corresponding values from L, but perl guarantees they are the same, so don't panic. As a GNU/Linux (and maybe Solaris) extension, also the constants C and C are available, if they could be found. No guarantees about suitability for use in C or Perl's C can be made though, although I would naively assume they "just work". =item aio_read $fh,$offset,$length, $data,$dataoffset, $callback->($retval) =item aio_write $fh,$offset,$length, $data,$dataoffset, $callback->($retval) Reads or writes C<$length> bytes from or to the specified C<$fh> and C<$offset> into the scalar given by C<$data> and offset C<$dataoffset> and calls the callback without the actual number of bytes read (or -1 on error, just like the syscall). C will, like C, shrink or grow the C<$data> scalar to offset plus the actual number of bytes read. If C<$offset> is undefined, then the current file descriptor offset will be used (and updated), otherwise the file descriptor offset will not be changed by these calls. If C<$length> is undefined in C, use the remaining length of C<$data>. If C<$dataoffset> is less than zero, it will be counted from the end of C<$data>. The C<$data> scalar I be modified in any way while the request is outstanding. Modifying it can result in segfaults or World War III (if the necessary/optional hardware is installed). Example: Read 15 bytes at offset 7 into scalar C<$buffer>, starting at offset C<0> within the scalar: aio_read $fh, 7, 15, $buffer, 0, sub { $_[0] > 0 or die "read error: $!"; print "read $_[0] bytes: <$buffer>\n"; }; =item aio_sendfile $out_fh, $in_fh, $in_offset, $length, $callback->($retval) Tries to copy C<$length> bytes from C<$in_fh> to C<$out_fh>. It starts reading at byte offset C<$in_offset>, and starts writing at the current file offset of C<$out_fh>. Because of that, it is not safe to issue more than one C per C<$out_fh>, as they will interfere with each other. The same C<$in_fh> works fine though, as this function does not move or use the file offset of C<$in_fh>. Please note that C can read more bytes from C<$in_fh> than are written, and there is no way to find out how many more bytes have been read from C alone, as C only provides the number of bytes written to C<$out_fh>. Only if the result value equals C<$length> one can assume that C<$length> bytes have been read. Unlike with other C functions, it makes a lot of sense to use C on non-blocking sockets, as long as one end (typically the C<$in_fh>) is a file - the file I/O will then be asynchronous, while the socket I/O will be non-blocking. Note, however, that you can run into a trap where C reads some data with readahead, then fails to write all data, and when the socket is ready the next time, the data in the cache is already lost, forcing C to again hit the disk. Explicit C + C let's you better control resource usage. This call tries to make use of a native C-like syscall to provide zero-copy operation. For this to work, C<$out_fh> should refer to a socket, and C<$in_fh> should refer to an mmap'able file. If a native sendfile cannot be found or it fails with C, C, C, C, C, C or C, it will be emulated, so you can call C on any type of filehandle regardless of the limitations of the operating system. As native sendfile syscalls (as practically any non-POSIX interface hacked together in a hurry to improve benchmark numbers) tend to be rather buggy on many systems, this implementation tries to work around some known bugs in Linux and FreeBSD kernels (probably others, too), but that might fail, so you really really should check the return value of C - fewre bytes than expected might have been transferred. =item aio_readahead $fh,$offset,$length, $callback->($retval) C populates the page cache with data from a file so that subsequent reads from that file will not block on disk I/O. The C<$offset> argument specifies the starting point from which data is to be read and C<$length> specifies the number of bytes to be read. I/O is performed in whole pages, so that offset is effectively rounded down to a page boundary and bytes are read up to the next page boundary greater than or equal to (off-set+length). C does not read beyond the end of the file. The current file offset of the file is left unchanged. If that syscall doesn't exist (likely if your OS isn't Linux) it will be emulated by simply reading the data, which would have a similar effect. =item aio_stat $fh_or_path, $callback->($status) =item aio_lstat $fh, $callback->($status) Works like perl's C or C in void context. The callback will be called after the stat and the results will be available using C or C<-s _> etc... The pathname passed to C must be absolute. See API NOTES, above, for an explanation. Currently, the stats are always 64-bit-stats, i.e. instead of returning an error when stat'ing a large file, the results will be silently truncated unless perl itself is compiled with large file support. To help interpret the mode and dev/rdev stat values, IO::AIO offers the following constants and functions (if not implemented, the constants will be C<0> and the functions will either C or fall back on traditional behaviour). C, C, C, C, C, C, C, C, C, C, C, C. Example: Print the length of F: aio_stat "/etc/passwd", sub { $_[0] and die "stat failed: $!"; print "size is ", -s _, "\n"; }; =item aio_statvfs $fh_or_path, $callback->($statvfs) Works like the POSIX C or C syscalls, depending on whether a file handle or path was passed. On success, the callback is passed a hash reference with the following members: C, C, C, C, C, C, C, C, C, C and C. On failure, C is passed. The following POSIX IO::AIO::ST_* constants are defined: C and C. The following non-POSIX IO::AIO::ST_* flag masks are defined to their correct value when available, or to C<0> on systems that do not support them: C, C, C, C, C, C, C, C, C and C. Example: stat C and dump out the data if successful. aio_statvfs "/wd", sub { my $f = $_[0] or die "statvfs: $!"; use Data::Dumper; say Dumper $f; }; # result: { bsize => 1024, bfree => 4333064312, blocks => 10253828096, files => 2050765568, flag => 4096, favail => 2042092649, bavail => 4333064312, ffree => 2042092649, namemax => 255, frsize => 1024, fsid => 1810 } Here is a (likely partial) list of fsid values used by Linux - it is safe to hardcode these when the $^O is C: 0x0000adf5 adfs 0x0000adff affs 0x5346414f afs 0x09041934 anon-inode filesystem 0x00000187 autofs 0x42465331 befs 0x1badface bfs 0x42494e4d binfmt_misc 0x9123683e btrfs 0x0027e0eb cgroupfs 0xff534d42 cifs 0x73757245 coda 0x012ff7b7 coh 0x28cd3d45 cramfs 0x453dcd28 cramfs-wend (wrong endianness) 0x64626720 debugfs 0x00001373 devfs 0x00001cd1 devpts 0x0000f15f ecryptfs 0x00414a53 efs 0x0000137d ext 0x0000ef53 ext2/ext3 0x0000ef51 ext2 0x00004006 fat 0x65735546 fuseblk 0x65735543 fusectl 0x0bad1dea futexfs 0x01161970 gfs2 0x47504653 gpfs 0x00004244 hfs 0xf995e849 hpfs 0x958458f6 hugetlbfs 0x2bad1dea inotifyfs 0x00009660 isofs 0x000072b6 jffs2 0x3153464a jfs 0x6b414653 k-afs 0x0bd00bd0 lustre 0x0000137f minix 0x0000138f minix 30 char names 0x00002468 minix v2 0x00002478 minix v2 30 char names 0x00004d5a minix v3 0x19800202 mqueue 0x00004d44 msdos 0x0000564c novell 0x00006969 nfs 0x6e667364 nfsd 0x00003434 nilfs 0x5346544e ntfs 0x00009fa1 openprom 0x7461636F ocfs2 0x00009fa0 proc 0x6165676c pstorefs 0x0000002f qnx4 0x858458f6 ramfs 0x52654973 reiserfs 0x00007275 romfs 0x67596969 rpc_pipefs 0x73636673 securityfs 0xf97cff8c selinux 0x0000517b smb 0x534f434b sockfs 0x73717368 squashfs 0x62656572 sysfs 0x012ff7b6 sysv2 0x012ff7b5 sysv4 0x01021994 tmpfs 0x15013346 udf 0x00011954 ufs 0x54190100 ufs byteswapped 0x00009fa2 usbdevfs 0x01021997 v9fs 0xa501fcf5 vxfs 0xabba1974 xenfs 0x012ff7b4 xenix 0x58465342 xfs 0x012fd16d xia =item aio_utime $fh_or_path, $atime, $mtime, $callback->($status) Works like perl's C function (including the special case of $atime and $mtime being undef). Fractional times are supported if the underlying syscalls support them. When called with a pathname, uses utimes(2) if available, otherwise utime(2). If called on a file descriptor, uses futimes(2) if available, otherwise returns ENOSYS, so this is not portable. Examples: # set atime and mtime to current time (basically touch(1)): aio_utime "path", undef, undef; # set atime to current time and mtime to beginning of the epoch: aio_utime "path", time, undef; # undef==0 =item aio_chown $fh_or_path, $uid, $gid, $callback->($status) Works like perl's C function, except that C for either $uid or $gid is being interpreted as "do not change" (but -1 can also be used). Examples: # same as "chown root path" in the shell: aio_chown "path", 0, -1; # same as above: aio_chown "path", 0, undef; =item aio_truncate $fh_or_path, $offset, $callback->($status) Works like truncate(2) or ftruncate(2). =item aio_allocate $fh, $mode, $offset, $len, $callback->($status) Allocates or freed disk space according to the C<$mode> argument. See the linux C docuemntation for details. C<$mode> can currently be C<0> or C to allocate space, or C, to deallocate a file range. The file system block size used by C is presumably the C returned by C. If C isn't available or cannot be emulated (currently no emulation will be attempted), passes C<-1> and sets C<$!> to C. =item aio_chmod $fh_or_path, $mode, $callback->($status) Works like perl's C function. =item aio_unlink $pathname, $callback->($status) Asynchronously unlink (delete) a file and call the callback with the result code. =item aio_mknod $pathname, $mode, $dev, $callback->($status) [EXPERIMENTAL] Asynchronously create a device node (or fifo). See mknod(2). The only (POSIX-) portable way of calling this function is: aio_mknod $pathname, IO::AIO::S_IFIFO | $mode, 0, sub { ... See C for info about some potentially helpful extra constants and functions. =item aio_link $srcpath, $dstpath, $callback->($status) Asynchronously create a new link to the existing object at C<$srcpath> at the path C<$dstpath> and call the callback with the result code. =item aio_symlink $srcpath, $dstpath, $callback->($status) Asynchronously create a new symbolic link to the existing object at C<$srcpath> at the path C<$dstpath> and call the callback with the result code. =item aio_readlink $pathname, $callback->($link) Asynchronously read the symlink specified by C<$path> and pass it to the callback. If an error occurs, nothing or undef gets passed to the callback. =item aio_realpath $pathname, $callback->($path) Asynchronously make the path absolute and resolve any symlinks in C<$path>. The resulting path only consists of directories (Same as L). This request can be used to get the absolute path of the current working directory by passing it a path of F<.> (a single dot). =item aio_rename $srcpath, $dstpath, $callback->($status) Asynchronously rename the object at C<$srcpath> to C<$dstpath>, just as rename(2) and call the callback with the result code. =item aio_mkdir $pathname, $mode, $callback->($status) Asynchronously mkdir (create) a directory and call the callback with the result code. C<$mode> will be modified by the umask at the time the request is executed, so do not change your umask. =item aio_rmdir $pathname, $callback->($status) Asynchronously rmdir (delete) a directory and call the callback with the result code. =item aio_readdir $pathname, $callback->($entries) Unlike the POSIX call of the same name, C reads an entire directory (i.e. opendir + readdir + closedir). The entries will not be sorted, and will B include the C<.> and C<..> entries. The callback is passed a single argument which is either C or an array-ref with the filenames. =item aio_readdirx $pathname, $flags, $callback->($entries, $flags) Quite similar to C, but the C<$flags> argument allows one to tune behaviour and output format. In case of an error, C<$entries> will be C. The flags are a combination of the following constants, ORed together (the flags will also be passed to the callback, possibly modified): =over 4 =item IO::AIO::READDIR_DENTS When this flag is off, then the callback gets an arrayref consisting of names only (as with C), otherwise it gets an arrayref with C<[$name, $type, $inode]> arrayrefs, each describing a single directory entry in more detail. C<$name> is the name of the entry. C<$type> is one of the C constants: C, C, C, C, C, C, C, C, C. C means just that: readdir does not know. If you need to know, you have to run stat yourself. Also, for speed reasons, the C<$type> scalars are read-only: you can not modify them. C<$inode> is the inode number (which might not be exact on systems with 64 bit inode numbers and 32 bit perls). This field has unspecified content on systems that do not deliver the inode information. =item IO::AIO::READDIR_DIRS_FIRST When this flag is set, then the names will be returned in an order where likely directories come first, in optimal stat order. This is useful when you need to quickly find directories, or you want to find all directories while avoiding to stat() each entry. If the system returns type information in readdir, then this is used to find directories directly. Otherwise, likely directories are names beginning with ".", or otherwise names with no dots, of which names with short names are tried first. =item IO::AIO::READDIR_STAT_ORDER When this flag is set, then the names will be returned in an order suitable for stat()'ing each one. That is, when you plan to stat() all files in the given directory, then the returned order will likely be fastest. If both this flag and C are specified, then the likely dirs come first, resulting in a less optimal stat order. =item IO::AIO::READDIR_FOUND_UNKNOWN This flag should not be set when calling C. Instead, it is being set by C, when any of the C<$type>'s found were C. The absence of this flag therefore indicates that all C<$type>'s are known, which can be used to speed up some algorithms. =back =item aio_load $pathname, $data, $callback->($status) This is a composite request that tries to fully load the given file into memory. Status is the same as with aio_read. =cut sub aio_load($$;$) { my ($path, undef, $cb) = @_; my $data = \$_[1]; my $pri = aioreq_pri; my $grp = aio_group $cb; aioreq_pri $pri; add $grp aio_open $path, O_RDONLY, 0, sub { my $fh = shift or return $grp->result (-1); aioreq_pri $pri; add $grp aio_read $fh, 0, (-s $fh), $$data, 0, sub { $grp->result ($_[0]); }; }; $grp } =item aio_copy $srcpath, $dstpath, $callback->($status) Try to copy the I (directories not supported as either source or destination) from C<$srcpath> to C<$dstpath> and call the callback with a status of C<0> (ok) or C<-1> (error, see C<$!>). This is a composite request that creates the destination file with mode 0200 and copies the contents of the source file into it using C, followed by restoring atime, mtime, access mode and uid/gid, in that order. If an error occurs, the partial destination file will be unlinked, if possible, except when setting atime, mtime, access mode and uid/gid, where errors are being ignored. =cut sub aio_copy($$;$) { my ($src, $dst, $cb) = @_; my $pri = aioreq_pri; my $grp = aio_group $cb; aioreq_pri $pri; add $grp aio_open $src, O_RDONLY, 0, sub { if (my $src_fh = $_[0]) { my @stat = stat $src_fh; # hmm, might block over nfs? aioreq_pri $pri; add $grp aio_open $dst, O_CREAT | O_WRONLY | O_TRUNC, 0200, sub { if (my $dst_fh = $_[0]) { aioreq_pri $pri; add $grp aio_sendfile $dst_fh, $src_fh, 0, $stat[7], sub { if ($_[0] == $stat[7]) { $grp->result (0); close $src_fh; my $ch = sub { aioreq_pri $pri; add $grp aio_chmod $dst_fh, $stat[2] & 07777, sub { aioreq_pri $pri; add $grp aio_chown $dst_fh, $stat[4], $stat[5], sub { aioreq_pri $pri; add $grp aio_close $dst_fh; } }; }; aioreq_pri $pri; add $grp aio_utime $dst_fh, $stat[8], $stat[9], sub { if ($_[0] < 0 && $! == ENOSYS) { aioreq_pri $pri; add $grp aio_utime $dst, $stat[8], $stat[9], $ch; } else { $ch->(); } }; } else { $grp->result (-1); close $src_fh; close $dst_fh; aioreq $pri; add $grp aio_unlink $dst; } }; } else { $grp->result (-1); } }, } else { $grp->result (-1); } }; $grp } =item aio_move $srcpath, $dstpath, $callback->($status) Try to move the I (directories not supported as either source or destination) from C<$srcpath> to C<$dstpath> and call the callback with a status of C<0> (ok) or C<-1> (error, see C<$!>). This is a composite request that tries to rename(2) the file first; if rename fails with C, it copies the file with C and, if that is successful, unlinks the C<$srcpath>. =cut sub aio_move($$;$) { my ($src, $dst, $cb) = @_; my $pri = aioreq_pri; my $grp = aio_group $cb; aioreq_pri $pri; add $grp aio_rename $src, $dst, sub { if ($_[0] && $! == EXDEV) { aioreq_pri $pri; add $grp aio_copy $src, $dst, sub { $grp->result ($_[0]); unless ($_[0]) { aioreq_pri $pri; add $grp aio_unlink $src; } }; } else { $grp->result ($_[0]); } }; $grp } =item aio_scandir $pathname, $maxreq, $callback->($dirs, $nondirs) Scans a directory (similar to C) but additionally tries to efficiently separate the entries of directory C<$path> into two sets of names, directories you can recurse into (directories), and ones you cannot recurse into (everything else, including symlinks to directories). C is a composite request that creates of many sub requests_ C<$maxreq> specifies the maximum number of outstanding aio requests that this function generates. If it is C<< <= 0 >>, then a suitable default will be chosen (currently 4). On error, the callback is called without arguments, otherwise it receives two array-refs with path-relative entry names. Example: aio_scandir $dir, 0, sub { my ($dirs, $nondirs) = @_; print "real directories: @$dirs\n"; print "everything else: @$nondirs\n"; }; Implementation notes. The C cannot be avoided, but C'ing every entry can. If readdir returns file type information, then this is used directly to find directories. Otherwise, after reading the directory, the modification time, size etc. of the directory before and after the readdir is checked, and if they match (and isn't the current time), the link count will be used to decide how many entries are directories (if >= 2). Otherwise, no knowledge of the number of subdirectories will be assumed. Then entries will be sorted into likely directories a non-initial dot currently) and likely non-directories (see C). Then every entry plus an appended C will be C'ed, likely directories first, in order of their inode numbers. If that succeeds, it assumes that the entry is a directory or a symlink to directory (which will be checked separately). This is often faster than stat'ing the entry itself because filesystems might detect the type of the entry without reading the inode data (e.g. ext2fs filetype feature), even on systems that cannot return the filetype information on readdir. If the known number of directories (link count - 2) has been reached, the rest of the entries is assumed to be non-directories. This only works with certainty on POSIX (= UNIX) filesystems, which fortunately are the vast majority of filesystems around. It will also likely work on non-POSIX filesystems with reduced efficiency as those tend to return 0 or 1 as link counts, which disables the directory counting heuristic. =cut sub aio_scandir($$;$) { my ($path, $maxreq, $cb) = @_; my $pri = aioreq_pri; my $grp = aio_group $cb; $maxreq = 4 if $maxreq <= 0; # get a wd object aioreq_pri $pri; add $grp aio_wd $path, sub { $_[0] or return $grp->result (); my $wd = [shift, "."]; # stat once aioreq_pri $pri; add $grp aio_stat $wd, sub { return $grp->result () if $_[0]; my $now = time; my $hash1 = join ":", (stat _)[0,1,3,7,9]; # read the directory entries aioreq_pri $pri; add $grp aio_readdirx $wd, READDIR_DIRS_FIRST, sub { my $entries = shift or return $grp->result (); # stat the dir another time aioreq_pri $pri; add $grp aio_stat $wd, sub { my $hash2 = join ":", (stat _)[0,1,3,7,9]; my $ndirs; # take the slow route if anything looks fishy if ($hash1 ne $hash2 or (stat _)[9] == $now) { $ndirs = -1; } else { # if nlink == 2, we are finished # for non-posix-fs's, we rely on nlink < 2 $ndirs = (stat _)[3] - 2 or return $grp->result ([], $entries); } my (@dirs, @nondirs); my $statgrp = add $grp aio_group sub { $grp->result (\@dirs, \@nondirs); }; limit $statgrp $maxreq; feed $statgrp sub { return unless @$entries; my $entry = shift @$entries; aioreq_pri $pri; $wd->[1] = "$entry/."; add $statgrp aio_stat $wd, sub { if ($_[0] < 0) { push @nondirs, $entry; } else { # need to check for real directory aioreq_pri $pri; $wd->[1] = $entry; add $statgrp aio_lstat $wd, sub { if (-d _) { push @dirs, $entry; unless (--$ndirs) { push @nondirs, @$entries; feed $statgrp; } } else { push @nondirs, $entry; } } } }; }; }; }; }; }; $grp } =item aio_rmtree $pathname, $callback->($status) Delete a directory tree starting (and including) C<$path>, return the status of the final C only. This is a composite request that uses C to recurse into and rmdir directories, and unlink everything else. =cut sub aio_rmtree; sub aio_rmtree($;$) { my ($path, $cb) = @_; my $pri = aioreq_pri; my $grp = aio_group $cb; aioreq_pri $pri; add $grp aio_scandir $path, 0, sub { my ($dirs, $nondirs) = @_; my $dirgrp = aio_group sub { add $grp aio_rmdir $path, sub { $grp->result ($_[0]); }; }; (aioreq_pri $pri), add $dirgrp aio_rmtree "$path/$_" for @$dirs; (aioreq_pri $pri), add $dirgrp aio_unlink "$path/$_" for @$nondirs; add $grp $dirgrp; }; $grp } =item aio_sync $callback->($status) Asynchronously call sync and call the callback when finished. =item aio_fsync $fh, $callback->($status) Asynchronously call fsync on the given filehandle and call the callback with the fsync result code. =item aio_fdatasync $fh, $callback->($status) Asynchronously call fdatasync on the given filehandle and call the callback with the fdatasync result code. If this call isn't available because your OS lacks it or it couldn't be detected, it will be emulated by calling C instead. =item aio_syncfs $fh, $callback->($status) Asynchronously call the syncfs syscall to sync the filesystem associated to the given filehandle and call the callback with the syncfs result code. If syncfs is not available, calls sync(), but returns C<-1> and sets errno to C nevertheless. =item aio_sync_file_range $fh, $offset, $nbytes, $flags, $callback->($status) Sync the data portion of the file specified by C<$offset> and C<$length> to disk (but NOT the metadata), by calling the Linux-specific sync_file_range call. If sync_file_range is not available or it returns ENOSYS, then fdatasync or fsync is being substituted. C<$flags> can be a combination of C, C and C: refer to the sync_file_range manpage for details. =item aio_pathsync $pathname, $callback->($status) This request tries to open, fsync and close the given path. This is a composite request intended to sync directories after directory operations (E.g. rename). This might not work on all operating systems or have any specific effect, but usually it makes sure that directory changes get written to disc. It works for anything that can be opened for read-only, not just directories. Future versions of this function might fall back to other methods when C on the directory fails (such as calling C). Passes C<0> when everything went ok, and C<-1> on error. =cut sub aio_pathsync($;$) { my ($path, $cb) = @_; my $pri = aioreq_pri; my $grp = aio_group $cb; aioreq_pri $pri; add $grp aio_open $path, O_RDONLY, 0, sub { my ($fh) = @_; if ($fh) { aioreq_pri $pri; add $grp aio_fsync $fh, sub { $grp->result ($_[0]); aioreq_pri $pri; add $grp aio_close $fh; }; } else { $grp->result (-1); } }; $grp } =item aio_msync $scalar, $offset = 0, $length = undef, flags = 0, $callback->($status) This is a rather advanced IO::AIO call, which only works on mmap(2)ed scalars (see the C function, although it also works on data scalars managed by the L or L modules, note that the scalar must only be modified in-place while an aio operation is pending on it). It calls the C function of your OS, if available, with the memory area starting at C<$offset> in the string and ending C<$length> bytes later. If C<$length> is negative, counts from the end, and if C<$length> is C, then it goes till the end of the string. The flags can be a combination of C, C and C. =item aio_mtouch $scalar, $offset = 0, $length = undef, flags = 0, $callback->($status) This is a rather advanced IO::AIO call, which works best on mmap(2)ed scalars. It touches (reads or writes) all memory pages in the specified range inside the scalar. All caveats and parameters are the same as for C, above, except for flags, which must be either C<0> (which reads all pages and ensures they are instantiated) or C, which modifies the memory page s(by reading and writing an octet from it, which dirties the page). =item aio_mlock $scalar, $offset = 0, $length = undef, $callback->($status) This is a rather advanced IO::AIO call, which works best on mmap(2)ed scalars. It reads in all the pages of the underlying storage into memory (if any) and locks them, so they are not getting swapped/paged out or removed. If C<$length> is undefined, then the scalar will be locked till the end. On systems that do not implement C, this function returns C<-1> and sets errno to C. Note that the corresponding C is synchronous and is documented under L. Example: open a file, mmap and mlock it - both will be undone when C<$data> gets destroyed. open my $fh, "<", $path or die "$path: $!"; my $data; IO::AIO::mmap $data, -s $fh, IO::AIO::PROT_READ, IO::AIO::MAP_SHARED, $fh; aio_mlock $data; # mlock in background =item aio_mlockall $flags, $callback->($status) Calls the C function with the given C<$flags> (a combination of C and C). On systems that do not implement C, this function returns C<-1> and sets errno to C. Note that the corresponding C is synchronous and is documented under L. Example: asynchronously lock all current and future pages into memory. aio_mlockall IO::AIO::MCL_FUTURE; =item aio_fiemap $fh, $start, $length, $flags, $count, $cb->(\@extents) Queries the extents of the given file (by calling the Linux C ioctl, see L for details). If the ioctl is not available on your OS, then this request will fail with C. C<$start> is the starting offset to query extents for, C<$length> is the size of the range to query - if it is C, then the whole file will be queried. C<$flags> is a combination of flags (C or C - C is also exported), and is normally C<0> or C to query the data portion. C<$count> is the maximum number of extent records to return. If it is C, then IO::AIO queries all extents of the range. As a very special case, if it is C<0>, then the callback receives the number of extents instead of the extents themselves (which is unreliable, see below). If an error occurs, the callback receives no arguments. The special C value C is available to test for flag errors. Otherwise, the callback receives an array reference with extent structures. Each extent structure is an array reference itself, with the following members: [$logical, $physical, $length, $flags] Flags is any combination of the following flag values (typically either C<0> or C (1)): C, C, C, C, C, C, C, C, C, C or C. At the time of this writing (Linux 3.2), this requets is unreliable unless C<$count> is C, as the kernel has all sorts of bugs preventing it to return all extents of a range for files with large number of extents. The code works around all these issues if C<$count> is undef. =item aio_group $callback->(...) This is a very special aio request: Instead of doing something, it is a container for other aio requests, which is useful if you want to bundle many requests into a single, composite, request with a definite callback and the ability to cancel the whole request with its subrequests. Returns an object of class L. See its documentation below for more info. Example: my $grp = aio_group sub { print "all stats done\n"; }; add $grp (aio_stat ...), (aio_stat ...), ...; =item aio_nop $callback->() This is a special request - it does nothing in itself and is only used for side effects, such as when you want to add a dummy request to a group so that finishing the requests in the group depends on executing the given code. While this request does nothing, it still goes through the execution phase and still requires a worker thread. Thus, the callback will not be executed immediately but only after other requests in the queue have entered their execution phase. This can be used to measure request latency. =item IO::AIO::aio_busy $fractional_seconds, $callback->() *NOT EXPORTED* Mainly used for debugging and benchmarking, this aio request puts one of the request workers to sleep for the given time. While it is theoretically handy to have simple I/O scheduling requests like sleep and file handle readable/writable, the overhead this creates is immense (it blocks a thread for a long time) so do not use this function except to put your application under artificial I/O pressure. =back =head2 IO::AIO::WD - multiple working directories Your process only has one current working directory, which is used by all threads. This makes it hard to use relative paths (some other component could call C at any time, and it is hard to control when the path will be used by IO::AIO). One solution for this is to always use absolute paths. This usually works, but can be quite slow (the kernel has to walk the whole path on every access), and can also be a hassle to implement. Newer POSIX systems have a number of functions (openat, fdopendir, futimensat and so on) that make it possible to specify working directories per operation. For portability, and because the clowns who "designed", or shall I write, perpetrated this new interface were obviously half-drunk, this abstraction cannot be perfect, though. IO::AIO allows you to convert directory paths into a so-called IO::AIO::WD object. This object stores the canonicalised, absolute version of the path, and on systems that allow it, also a directory file descriptor. Everywhere where a pathname is accepted by IO::AIO (e.g. in C or C), one can specify an array reference with an IO::AIO::WD object and a pathname instead (or the IO::AIO::WD object alone, which gets interpreted as C<[$wd, "."]>). If the pathname is absolute, the IO::AIO::WD object is ignored, otherwise the pathname is resolved relative to that IO::AIO::WD object. For example, to get a wd object for F and then stat F inside, you would write: aio_wd "/etc", sub { my $etcdir = shift; # although $etcdir can be undef on error, there is generally no reason # to check for errors here, as aio_stat will fail with ENOENT # when $etcdir is undef. aio_stat [$etcdir, "passwd"], sub { # yay }; }; That C is a request and not a normal function shows that creating an IO::AIO::WD object is itself a potentially blocking operation, which is why it is done asynchronously. To stat the directory obtained with C above, one could write either of the following three request calls: aio_lstat "/etc" , sub { ... # pathname as normal string aio_lstat [$wd, "."], sub { ... # "." relative to $wd (i.e. $wd itself) aio_lstat $wd , sub { ... # shorthand for the previous As with normal pathnames, IO::AIO keeps a copy of the working directory object and the pathname string, so you could write the following without causing any issues due to C<$path> getting reused: my $path = [$wd, undef]; for my $name (qw(abc def ghi)) { $path->[1] = $name; aio_stat $path, sub { # ... }; } There are some caveats: when directories get renamed (or deleted), the pathname string doesn't change, so will point to the new directory (or nowhere at all), while the directory fd, if available on the system, will still point to the original directory. Most functions accepting a pathname will use the directory fd on newer systems, and the string on older systems. Some functions (such as realpath) will always rely on the string form of the pathname. So this fucntionality is mainly useful to get some protection against C, to easily get an absolute path out of a relative path for future reference, and to speed up doing many operations in the same directory (e.g. when stat'ing all files in a directory). The following functions implement this working directory abstraction: =over 4 =item aio_wd $pathname, $callback->($wd) Asynchonously canonicalise the given pathname and convert it to an IO::AIO::WD object representing it. If possible and supported on the system, also open a directory fd to speed up pathname resolution relative to this working directory. If something goes wrong, then C is passwd to the callback instead of a working directory object and C<$!> is set appropriately. Since passing C as working directory component of a pathname fails the request with C, there is often no need for error checking in the C callback, as future requests using the value will fail in the expected way. If this call isn't available because your OS lacks it or it couldn't be detected, it will be emulated by calling C instead. =item IO::AIO::CWD This is a compiletime constant (object) that represents the process current working directory. Specifying this object as working directory object for a pathname is as if the pathname would be specified directly, without a directory object, e.g., these calls are functionally identical: aio_stat "somefile", sub { ... }; aio_stat [IO::AIO::CWD, "somefile"], sub { ... }; =back =head2 IO::AIO::REQ CLASS All non-aggregate C functions return an object of this class when called in non-void context. =over 4 =item cancel $req Cancels the request, if possible. Has the effect of skipping execution when entering the B state and skipping calling the callback when entering the the B state, but will leave the request otherwise untouched (with the exception of readdir). That means that requests that currently execute will not be stopped and resources held by the request will not be freed prematurely. =item cb $req $callback->(...) Replace (or simply set) the callback registered to the request. =back =head2 IO::AIO::GRP CLASS This class is a subclass of L, so all its methods apply to objects of this class, too. A IO::AIO::GRP object is a special request that can contain multiple other aio requests. You create one by calling the C constructing function with a callback that will be called when all contained requests have entered the C state: my $grp = aio_group sub { print "all requests are done\n"; }; You add requests by calling the C method with one or more C objects: $grp->add (aio_unlink "..."); add $grp aio_stat "...", sub { $_[0] or return $grp->result ("error"); # add another request dynamically, if first succeeded add $grp aio_open "...", sub { $grp->result ("ok"); }; }; This makes it very easy to create composite requests (see the source of C for an application) that work and feel like simple requests. =over 4 =item * The IO::AIO::GRP objects will be cleaned up during calls to C, just like any other request. =item * They can be canceled like any other request. Canceling will cancel not only the request itself, but also all requests it contains. =item * They can also can also be added to other IO::AIO::GRP objects. =item * You must not add requests to a group from within the group callback (or any later time). =back Their lifetime, simplified, looks like this: when they are empty, they will finish very quickly. If they contain only requests that are in the C state, they will also finish. Otherwise they will continue to exist. That means after creating a group you have some time to add requests (precisely before the callback has been invoked, which is only done within the C). And in the callbacks of those requests, you can add further requests to the group. And only when all those requests have finished will the the group itself finish. =over 4 =item add $grp ... =item $grp->add (...) Add one or more requests to the group. Any type of L can be added, including other groups, as long as you do not create circular dependencies. Returns all its arguments. =item $grp->cancel_subs Cancel all subrequests and clears any feeder, but not the group request itself. Useful when you queued a lot of events but got a result early. The group request will finish normally (you cannot add requests to the group). =item $grp->result (...) Set the result value(s) that will be passed to the group callback when all subrequests have finished and set the groups errno to the current value of errno (just like calling C without an error number). By default, no argument will be passed and errno is zero. =item $grp->errno ([$errno]) Sets the group errno value to C<$errno>, or the current value of errno when the argument is missing. Every aio request has an associated errno value that is restored when the callback is invoked. This method lets you change this value from its default (0). Calling C will also set errno, so make sure you either set C<$!> before the call to C, or call c after it. =item feed $grp $callback->($grp) Sets a feeder/generator on this group: every group can have an attached generator that generates requests if idle. The idea behind this is that, although you could just queue as many requests as you want in a group, this might starve other requests for a potentially long time. For example, C might generate hundreds of thousands of C requests, delaying any later requests for a long time. To avoid this, and allow incremental generation of requests, you can instead a group and set a feeder on it that generates those requests. The feed callback will be called whenever there are few enough (see C, below) requests active in the group itself and is expected to queue more requests. The feed callback can queue as many requests as it likes (i.e. C does not impose any limits). If the feed does not queue more requests when called, it will be automatically removed from the group. If the feed limit is C<0> when this method is called, it will be set to C<2> automatically. Example: # stat all files in @files, but only ever use four aio requests concurrently: my $grp = aio_group sub { print "finished\n" }; limit $grp 4; feed $grp sub { my $file = pop @files or return; add $grp aio_stat $file, sub { ... }; }; =item limit $grp $num Sets the feeder limit for the group: The feeder will be called whenever the group contains less than this many requests. Setting the limit to C<0> will pause the feeding process. The default value for the limit is C<0>, but note that setting a feeder automatically bumps it up to C<2>. =back =head2 SUPPORT FUNCTIONS =head3 EVENT PROCESSING AND EVENT LOOP INTEGRATION =over 4 =item $fileno = IO::AIO::poll_fileno Return the I. This filehandle must be polled for reading by some mechanism outside this module (e.g. EV, Glib, select and so on, see below or the SYNOPSIS). If the pipe becomes readable you have to call C to check the results. See C for an example. =item IO::AIO::poll_cb Process some outstanding events on the result pipe. You have to call this regularly. Returns C<0> if all events could be processed (or there were no events to process), or C<-1> if it returned earlier for whatever reason. Returns immediately when no events are outstanding. The amount of events processed depends on the settings of C and C. If not all requests were processed for whatever reason, the filehandle will still be ready when C returns, so normally you don't have to do anything special to have it called later. Apart from calling C when the event filehandle becomes ready, it can be beneficial to call this function from loops which submit a lot of requests, to make sure the results get processed when they become available and not just when the loop is finished and the event loop takes over again. This function returns very fast when there are no outstanding requests. Example: Install an Event watcher that automatically calls IO::AIO::poll_cb with high priority (more examples can be found in the SYNOPSIS section, at the top of this document): Event->io (fd => IO::AIO::poll_fileno, poll => 'r', async => 1, cb => \&IO::AIO::poll_cb); =item IO::AIO::poll_wait If there are any outstanding requests and none of them in the result phase, wait till the result filehandle becomes ready for reading (simply does a C