pax_global_header00006660000000000000000000000064143265621400014515gustar00rootroot0000000000000052 comment=04fcbf28312c54f77fbaab4e968d33147602a9f5 omp-1.0.0/000077500000000000000000000000001432656214000123065ustar00rootroot00000000000000omp-1.0.0/CMakeLists.txt000066400000000000000000000006321432656214000150470ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.5) project(omp) set(CMAKE_CXX_STANDARD 11) find_package(Threads) include_directories(include) add_library(omp src/omp.cpp include/omp.hpp) add_executable(omp-test main.cpp) target_link_libraries(omp-test omp ${CMAKE_THREAD_LIBS_INIT}) install(FILES include/omp.hpp DESTINATION include) install(TARGETS omp LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) omp-1.0.0/LICENSE000066400000000000000000000020731432656214000133150ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2016 Jonathon LeFaive Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. omp-1.0.0/README.md000066400000000000000000000011431432656214000135640ustar00rootroot00000000000000# OMP A parallel programming library that mimics OpenMP syntax. ## Example: omp parallel for ```c++ std::vector arr(256, 0.0); std::mutex named_section; omp::parallel_for(arr.begin(), arr.end(), [&named_section](double& element, std::size_t index) { element = (index + 1); omp::critical(named_section, []() { }); { std::lock_guard critical(named_section); // lock_guard is usually a better alternative to omp::critical. } omp::critical([]() { }); }); ``` ## Example: omp parallel ```c++ unsigned num_threads = 8; omp::parallel([]() { }, num_threads); ```omp-1.0.0/include/000077500000000000000000000000001432656214000137315ustar00rootroot00000000000000omp-1.0.0/include/omp.hpp000066400000000000000000000324071432656214000152430ustar00rootroot00000000000000 #ifndef OMP_OMP_HPP #define OMP_OMP_HPP #include #include #include #include #include #include #include #include #include #include namespace omp { struct iteration_context { std::size_t thread_index; std::size_t index; }; namespace internal { extern std::mutex global_mutex; extern const unsigned default_num_threads; inline std::uint64_t ceil_divide(std::uint64_t x, std::uint64_t y) { return (x + y - 1) / y; } class thread_pool { public: thread_pool(const std::function& fn, unsigned num_threads = 0) : num_threads_(num_threads ? num_threads : default_num_threads) { threads_.reserve(num_threads - 1); for (unsigned i = 0; i < (num_threads_ - 1); ++i) threads_.emplace_back(fn, i); fn(num_threads_ - 1); for (auto it = threads_.begin(); it != threads_.end(); ++it) it->join(); } private: const std::size_t num_threads_; std::vector threads_; }; class thread_pool2 { private: enum class state { shutdown = 0, run, running, sleep }; std::vector threads_; std::vector states_; std::mutex mtx_; std::condition_variable cv_; std::function fn_; std::size_t sleeping_counter_; public: thread_pool2(std::size_t num_threads = 0) : states_(num_threads ? num_threads - 1 : default_num_threads - 1, state::sleep), sleeping_counter_(states_.size()) { threads_.reserve(states_.size()); for (std::size_t i = 0; i < states_.size(); ++i) { threads_.emplace_back(std::bind(&thread_pool2::routine, this, i)); } } ~thread_pool2() { { std::unique_lock lk(mtx_); std::fill(states_.begin(), states_.end(), state::shutdown); } cv_.notify_all(); for (auto& t : threads_) t.join(); } std::size_t thread_count() const { return threads_.size() + 1; } void routine(std::size_t thread_idx) { while (true) { { std::unique_lock lk(mtx_); if (states_[thread_idx] == state::shutdown) break; if (states_[thread_idx] == state::running) { states_[thread_idx] = state::sleep; ++sleeping_counter_; cv_.notify_all(); } cv_.wait(lk, [this, thread_idx] { return states_[thread_idx] != state::sleep; }); if (states_[thread_idx] == state::shutdown) break; states_[thread_idx] = state::running; } if (fn_) { fn_(thread_idx); } } } //template void operator()(std::function&& fn) { fn_ = std::move(fn); { std::unique_lock lk(mtx_); std::fill(states_.begin(), states_.end(), state::run); sleeping_counter_ = 0; } cv_.notify_all(); if (fn_) { fn_(states_.size()); } { // Wait for child threads to complete. std::unique_lock lk(mtx_); cv_.wait(lk, [this] { return sleeping_counter_ == states_.size(); }); // std::count(states_.begin(), states_.end(), state::sleep) == states_.size(); }); } fn_ = nullptr; } }; template class dynamic_iterator_thread_pool { public: dynamic_iterator_thread_pool(std::size_t chunk_size, Iter begin, Iter end, const std::function& fn, unsigned num_threads) : fn_(fn), cur_(begin), end_(end), index_(0), chunk_size_(chunk_size ? chunk_size : 1), num_threads_(num_threads ? num_threads : default_num_threads) { threads_.reserve(num_threads_ - 1); for (unsigned i = 0; i < (num_threads_ - 1); ++i) threads_.emplace_back(std::bind(&dynamic_iterator_thread_pool::routine, this, i)); this->routine(num_threads_ - 1); for (auto it = threads_.begin(); it != threads_.end(); ++it) it->join(); } private: std::function fn_; std::vector threads_; Iter cur_; const Iter end_; std::size_t index_; std::mutex mtx_; const std::size_t chunk_size_; const std::size_t num_threads_; void routine(std::size_t thread_index) { bool done = false; while (!done) { std::vector chunk(chunk_size_); std::unique_lock lk(mtx_); std::size_t index = index_; for (std::size_t chunk_offset = 0; chunk_offset < chunk.size(); ++chunk_offset) { ++index_; chunk[chunk_offset] = cur_; if (cur_ != end_) ++cur_; } lk.unlock(); for (std::size_t chunk_offset = 0; chunk_offset < chunk.size(); ++chunk_offset) { if (chunk[chunk_offset] == end_) { done = true; } else { fn_(*chunk[chunk_offset], {thread_index, index + chunk_offset}); //fn_ ? fn_(*it, i) : void(); } } } } }; template class static_iterator_thread_pool { public: static_iterator_thread_pool(std::size_t chunk_size, Iter begin, Iter end, const std::function& fn, unsigned num_threads = 0) : fn_(fn), num_threads_(num_threads ? num_threads : default_num_threads), beg_(begin), end_(end), total_elements_(std::distance(beg_, end_)), chunk_size_(chunk_size ? chunk_size : static_cast(total_elements_) / num_threads_) { threads_.reserve(num_threads_ - 1); for (unsigned i = 0; i < (num_threads_ - 1); ++i) threads_.emplace_back(std::bind(&static_iterator_thread_pool::routine, this, i)); this->routine(num_threads_ - 1); for (auto it = threads_.begin(); it != threads_.end(); ++it) it->join(); } private: std::function fn_; const std::size_t num_threads_; std::vector threads_; const Iter beg_; const Iter end_; long total_elements_; const std::size_t chunk_size_; public: void routine(std::size_t thread_index) { auto cur = beg_; std::advance(cur, thread_index * chunk_size_); for (std::size_t index = (thread_index * chunk_size_); index < total_elements_; index += (chunk_size_ * num_threads_ - chunk_size_), std::advance(cur, chunk_size_ * num_threads_ - chunk_size_)) { for (std::size_t chunk_offset = 0; chunk_offset < chunk_size_ && index < total_elements_; ++chunk_offset) { assert(cur != end_); fn_(*cur, {thread_index,index}); //fn_ ? fn_(*it, i) : void(); ++cur; ++index; } } } }; template class static_iterator_functor { public: static_iterator_functor(std::size_t chunk_size, Iter begin, Iter end, const std::function& fn, unsigned num_threads) : fn_(fn), num_threads_(num_threads ? num_threads : default_num_threads), beg_(begin), end_(end), total_elements_(std::distance(beg_, end_)), chunk_size_(chunk_size ? chunk_size : ceil_divide(total_elements_, num_threads_)) { //assert(chunk_size_ > 0); // threads_.reserve(num_threads_ - 1); // for (unsigned i = 0; i < (num_threads_ - 1); ++i) // threads_.emplace_back(std::bind(&static_iterator_thread_pool::routine, this, i)); // this->routine(num_threads_ - 1); // // for (auto it = threads_.begin(); it != threads_.end(); ++it) // it->join(); } private: std::function fn_; const std::size_t num_threads_; const Iter beg_; const Iter end_; std::int64_t total_elements_; const std::int64_t chunk_size_; public: void operator()(std::size_t thread_index) { if (total_elements_ > 0) { auto cur = beg_; std::size_t index = (thread_index * chunk_size_); if (index >= total_elements_) return; std::advance(cur, thread_index * chunk_size_); for ( ; index < total_elements_; ) { std::size_t end_off = index + chunk_size_; for (; index < end_off && index < total_elements_; ++index,++cur) { assert(cur != end_); fn_(*cur, {thread_index, index}); //fn_ ? fn_(*it, i) : void(); } index += (chunk_size_ * num_threads_ - chunk_size_); if (index >= total_elements_) break; std::advance(cur, chunk_size_ * num_threads_ - chunk_size_); } } } }; } class sequence_iterator { public: typedef sequence_iterator self_type; typedef int difference_type; typedef int value_type; typedef value_type& reference; typedef value_type* pointer; typedef std::random_access_iterator_tag iterator_category; sequence_iterator() : val_(0) {} sequence_iterator(value_type val) : val_(val) { } //reference operator [] (difference_type); bool operator < (const self_type& other) { return val_ < other.val_; } bool operator > (const self_type& other) { return val_ > other.val_; } bool operator <= (const self_type& other) { return val_ <= other.val_; } bool operator >= (const self_type& other) { return val_ >= other.val_; } self_type operator++() { self_type ret = *this; ++val_; return ret; } self_type operator--() { self_type ret = *this; --val_; return ret; } self_type& operator += (difference_type i) { val_ += i; return *this; } self_type& operator -= (difference_type i) { val_ -= i; return *this; } self_type operator + (difference_type i) { return self_type(val_ + i); } self_type operator - (difference_type i) { return self_type(val_ - i); } difference_type operator - (const self_type& other) { return val_ - other.val_; } void operator++(int) { ++val_; } void operator--(int) { --val_; } reference operator*() { return val_; } pointer operator->() { return &val_; } bool operator==(const self_type& rhs) const { return (val_ == rhs.val_); } bool operator!=(const self_type& rhs) const { return (val_ != rhs.val_); } private: value_type val_; }; class schedule { public: schedule(std::size_t chunk_size); std::size_t chunk_size() const; protected: std::size_t chunk_size_; }; class dynamic_schedule : public schedule { public: dynamic_schedule(std::size_t chunk_size = 0); }; class static_schedule : public schedule { public: static_schedule(std::size_t chunk_size = 0); }; void parallel(const std::function& operation, unsigned thread_cnt = 0); template void parallel_for(const dynamic_schedule& sched, Iterator begin, Iterator end, const std::function& operation, unsigned thread_cnt = 0) { internal::dynamic_iterator_thread_pool pool(sched.chunk_size(), begin, end, operation, thread_cnt); } template void parallel_for(const static_schedule& sched, Iterator begin, Iterator end, const std::function& operation, unsigned thread_cnt = 0) { internal::static_iterator_thread_pool pool(sched.chunk_size(), begin, end, operation, thread_cnt); } template void parallel_for_exp(const static_schedule& sched, Iterator begin, Iterator end, const std::function& operation, internal::thread_pool2& tp) { tp(internal::static_iterator_functor(sched.chunk_size(), begin, end, operation, tp.thread_count())); //std::bind(&internal::static_iterator_functor::routine, &static_fn, std::placeholders::_1)); } template void parallel_for(Iterator begin, Iterator end, const std::function& operation, unsigned thread_cnt = 0) { parallel_for(static_schedule(), begin, end, operation, thread_cnt); } template void critical(std::mutex& mtx, Handler fn) { std::lock_guard lk(mtx); fn(); } template void critical(Handler fn) { std::lock_guard lk(internal::global_mutex); fn(); } } #endif //OMP_OMP_HPP omp-1.0.0/main.cpp000066400000000000000000000021711432656214000137370ustar00rootroot00000000000000 #include "omp.hpp" #include #include int main() { std::vector arr(257, 0.0); std::mutex named_section; std::size_t total = 0; omp::internal::thread_pool2 pool(8); omp::parallel_for_exp(omp::static_schedule(), arr.begin(), arr.end(), [&total, &named_section](double& element, const omp::iteration_context& ctx) { element = (ctx.index + 1); omp::critical(named_section, [&total, element, &ctx]() { ++total; }); { std::lock_guard critical(named_section); // lock_guard is usually a better alternative to omp::critical. } omp::critical([]() { }); }, pool); // 8); omp::parallel_for(omp::dynamic_schedule(), omp::sequence_iterator(-2), omp::sequence_iterator(5), [&total, &named_section](int& element, const omp::iteration_context& ctx) { std::lock_guard critical(named_section); ++total; }, 3); std::cout << total << std::endl; assert(total == 264); unsigned num_threads = 8; omp::parallel([](std::size_t thread_idx) { }, num_threads); return total == 264 ? EXIT_SUCCESS : EXIT_FAILURE; }omp-1.0.0/src/000077500000000000000000000000001432656214000130755ustar00rootroot00000000000000omp-1.0.0/src/omp.cpp000066400000000000000000000013221432656214000143720ustar00rootroot00000000000000 #include "omp.hpp" namespace omp { namespace internal { std::mutex global_mutex; const unsigned default_num_threads(std::thread::hardware_concurrency() ? std::thread::hardware_concurrency() : 4); } schedule::schedule(std::size_t chunk_size) : chunk_size_(chunk_size) { } std::size_t schedule::chunk_size() const { return chunk_size_; } dynamic_schedule::dynamic_schedule(std::size_t chunk_size) : schedule(chunk_size) { } static_schedule::static_schedule(std::size_t chunk_size) : schedule(chunk_size) { } void parallel(const std::function& operation, unsigned thread_cnt) { internal::thread_pool pool(operation, thread_cnt); } }