pax_global_header 0000666 0000000 0000000 00000000064 14477647226 0014535 g ustar 00root root 0000000 0000000 52 comment=89083927dc27dfc3e61154821cb0f30451af063a
cooler-0.9.3/ 0000775 0000000 0000000 00000000000 14477647226 0013031 5 ustar 00root root 0000000 0000000 cooler-0.9.3/.github/ 0000775 0000000 0000000 00000000000 14477647226 0014371 5 ustar 00root root 0000000 0000000 cooler-0.9.3/.github/codecov.yml 0000664 0000000 0000000 00000000345 14477647226 0016540 0 ustar 00root root 0000000 0000000 codecov:
notify:
require_ci_to_pass: yes
coverage:
precision: 2
round: down
range: 70..100
status:
project:
default:
target: 90%
threshold: 1%
patch: no
changes: no
comment: off
cooler-0.9.3/.github/dependabot.yml 0000664 0000000 0000000 00000000320 14477647226 0017214 0 ustar 00root root 0000000 0000000 ---
version: 2
updates:
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "daily"
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
cooler-0.9.3/.github/workflows/ 0000775 0000000 0000000 00000000000 14477647226 0016426 5 ustar 00root root 0000000 0000000 cooler-0.9.3/.github/workflows/ci.yml 0000664 0000000 0000000 00000002253 14477647226 0017546 0 ustar 00root root 0000000 0000000 name: CI
on:
push:
branches: [ master ]
tags:
- "v*" # Tag events matching v*, i.e. v1.0, v20.15.10
pull_request:
branches: [ master ]
jobs:
Lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.x"
- run: |
python -m pip install --upgrade hatch
pip install -e .[dev]
hatch run lint
Test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ "3.8", "3.9", "3.10", "3.11" ]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- run: |
python -m pip install --upgrade pip
pip install -e .[dev]
# stop the build if there are Python syntax errors or undefined names
ruff . --select=E9,F63,F7,F82
pytest
Release:
# Only run on v* tag events
if: startsWith(github.ref, 'refs/tags')
needs: [Lint, Test]
uses: open2c/cooler/.github/workflows/publish.yml@master
cooler-0.9.3/.github/workflows/publish.yml 0000664 0000000 0000000 00000001262 14477647226 0020620 0 ustar 00root root 0000000 0000000 name: Publish Python Package to PyPI
on:
workflow_call:
workflow_dispatch:
jobs:
Publish:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: "3.x"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install build
- name: Build
run: python -m build
- name: Publish distribution 📦 to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: ${{ secrets.PYPI_USERNAME }}
password: ${{ secrets.PYPI_PASSWORD }}
cooler-0.9.3/.gitignore 0000664 0000000 0000000 00000000454 14477647226 0015024 0 ustar 00root root 0000000 0000000 *.swp
*.swo
*~
*.py[cod]
__pycache__
# test and coverage artifacts
.cache
.pytest_cache
.coverage
.coverage.*
coverage.xml
htmlcov/
# setup and build artifacts
docs/_*
*.egg-info/
dist/
build/
MANIFEST
# OS-generated files
.DS_Store
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db
tmp/
_scratch/
cooler-0.9.3/.pre-commit-config.yaml 0000664 0000000 0000000 00000001075 14477647226 0017315 0 ustar 00root root 0000000 0000000 exclude: '^scripts'
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: mixed-line-ending
- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
- repo: https://github.com/asottile/pyupgrade
rev: v3.10.1
hooks:
- id: pyupgrade
args:
- --py36-plus
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.0.287
hooks:
- id: ruff
args: ["--exit-zero", "src/cooler"]
cooler-0.9.3/.readthedocs.yml 0000664 0000000 0000000 00000001127 14477647226 0016120 0 ustar 00root root 0000000 0000000 # .readthedocs.yml
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
# Required
version: 2
# Build documentation in the docs/ directory with Sphinx
sphinx:
configuration: docs/conf.py
# Build documentation with MkDocs
#mkdocs:
# configuration: mkdocs.yml
# Optionally build your docs in additional formats such as PDF and ePub
formats: all
# Optionally set the version of Python and requirements required to build your docs
python:
install:
- method: pip
path: .
extra_requirements:
- dev
- docs
cooler-0.9.3/CHANGES.md 0000664 0000000 0000000 00000043746 14477647226 0014441 0 ustar 00root root 0000000 0000000 # Release notes #
## [v0.9.3](https://github.com/open2c/cooler/compare/v0.9.2...v0.9.3)
### Bug fixes
* Fix estimation of mean bin size when using variable-sized bins #332.
* Fix regression bug to handle multiple convergence statuses in balance CLI #334.
## [v0.9.2](https://github.com/open2c/cooler/compare/v0.9.1...v0.9.2)
### Bug fixes
Several fixes from @robomics
* Improve handling of uint attributes
* Fix incorrect variance stored by _balance_cisonly
* cload.py: fix detection of pandas v2.*.* causing IOHandles error
## [v0.9.1](https://github.com/open2c/cooler/compare/v0.9.0...v0.9.1)
Date 2023-01-23
### Maintenance
* Export `_IndexingMixin` from `cooler.core` to keep private API used by cooltools.
## [v0.9.0](https://github.com/open2c/cooler/compare/v0.8.11...v0.9.0)
Date 2023-01-18
### New features
* New query engine implementation (no user-facing API changes).
* New logging context and verbosity system.
### API changes
* `cooler.balance_cooler` now uses the same default argument values as the CLI.
### Schema
* `cooler balance` now inserts `divisive_weights: False` metadata to balancing weights.
### Maintenance
* Dropped Python 2 support.
* Removed deprecated `io` module.
* Deprecated `tools` module; renamed `parallel`.
* Various dependency maintenance upgrades.
* Modernized CI and dev tools.
* Migrated to pyproject.toml and modernized packaging infra.
## [v0.8.11](https://github.com/open2c/cooler/compare/v0.8.10...v0.8.11)
Date : 2021-04-01
### Bug fixes
* Hotfix `cooler cload pairs` file header parsing to work with the latest version of pandas (>=1.2).
* Update CLI help for `--balance-args` to clarify usage. By @gfudenberg.
## [v0.8.10](https://github.com/open2c/cooler/compare/v0.8.9...v0.8.10)
Date : 2020-09-25
### Bug fixes
* Fixed the new header parsing in `cooler cload pairs` to handle esoteric file stream implementations. Specifically `GzipFile` had stopped working. By @golobor
## [v0.8.9](https://github.com/open2c/cooler/compare/v0.8.8...v0.8.9)
Date : 2020-07-17
### Enhancements
* Added single-cell cooler file flavor (.scool) (#201)
## [v0.8.8](https://github.com/open2c/cooler/compare/v0.8.7...v0.8.8)
Date : 2020-06-23
### Maintenance
* Improved code coverage
* Added missing autodoc for cooler balance
* Dropped pysam and biopython as hard dependencies
* Officially sunsetting Python 2.7 support
### Enhancements
* Added zoom progressions (#203)
### Bug fixes
* Allow hashes in read IDs in cload pairs (#193)
## [v0.8.7](https://github.com/open2c/cooler/compare/v0.8.6...v0.8.7)
Date: 2020-01-12
### Maintenance
* Code styling with black
* Add coverage reporting
### Bug fixes
* Replace `json` with `simplejson` to deal with attrs stored as bytes
## [v0.8.6](https://github.com/open2c/cooler/compare/v0.8.5...v0.8.6)
Date: 2019-08-12
### Maintenance
* Added contributing guidelines
### Bug fixes
* Fixed a related regression that affected selection of the `chrom` column.
Post-release `v0.8.6.post0`: requirements files added to MANIFEST.in
## [v0.8.5](https://github.com/open2c/cooler/compare/v0.8.4...v0.8.5)
Date: 2019-04-08
### Bug fixes
* Fixed a regression that prevented selection of bins excluding the `chrom` column.
## [v0.8.4](https://github.com/open2c/cooler/compare/v0.8.3...v0.8.4)
Date: 2019-04-04
### Enhancements
* When creating coolers from unordered input, change the default temporary dir to be the same as the output file instead of the system tmp (pass '-' to use the system one). #150
* `cooler ls` and `list_coolers()` now output paths in natural order. #153
* New option in `cooler.matrix()` to handle divisive balancing weight vectors.
### Bug fixes
* Restore function of `--count-as-float` option to `cooler load`
* Fixed partitioning issue sometimes causing some bins to get split during coarsen
* `rename_chroms()` will refresh cached chromosome names #147
* `Cooler.bins()` selector will always properly convert bins/chrom integer IDs to categorical chromosome names when the number of contigs is very large and therefore the HDF5 ENUM header is missing. Before this would only happen when explicitly requesting `convert_enum=True`.
## [v0.8.3](https://github.com/open2c/cooler/compare/v0.8.2...v0.8.3)
Date: 2019-02-11
### Bug fixes
* Fixed import bug in `rename_chroms`
* `create_cooler` no longer requires a "count" column when specifying custom value columns
## [v0.8.2](https://github.com/open2c/cooler/compare/v0.8.1...v0.8.2)
Date: 2019-01-20
### Enhancements
New options for `cooler dump` pixel output:
* `--matrix` option: Applies to symmetric-upper coolers; no-op for square coolers. Generates all lower triangular pixels necessary to fill the requested genomic query window. Without this option, `cooler dump` will only return the data explicity stored in the pixel table (i.e. upper triangle).
* `-one-based-ids` and `--one-based-starts` convenience options.
### Bug fixes
* A bug was introduced into the matrix-as-pixels selector in 0.8.0 that also affected `cooler dump`. The behavior has been restored to that in 0.7.
## [v0.8.1](https://github.com/open2c/cooler/compare/v0.8.0...v0.8.1)
Date: 2019-01-02
### Enhancements
* `cooler zoomify` command can take additional base resolutions as input.
### Bug fixes
* Fixed regression that slowed down pre-processing during coarsen.
* Fixed missing import on handling bad URIs.
* Restore but deprecate `cooler.io.ls` for backwards compatibility.
## [v0.8.0](https://github.com/open2c/cooler/compare/v0.7.11...v0.8.0)
Date: 2018-12-31
This is a major release from 0.7 and includes an updated format version, and several API changes and deprecations.
### Schema
* New schema version: v3
* Adds required `storage-mode` metadata attribute. Two possible values: `"symmetric-upper"` indicates a symmetric matrix encoded as upper triangle (previously the only storage mode); `"square"` indicates no special encoding (e.g. for non-symmetric matrices).
### New features
* Support for **non-symmetric** matrices, e.g. RNA-DNA maps.
* Create function accepts a boolean `symmetric_upper` option to set the storage mode. Default is `True`.
* Creation commands also use `symmetric_upper` by default, which can be overridden with a flag.
* All main functionality exposed through top-level functions (create, merge, coarsen, zoomify, balance)
* New commands for generic file operations and file inspection.
### API changes
* `cooler.annotate()` option `replace` now defaults to `False`.
* Submodule renaming. Old names are preserved as aliases but are deprecated.
* `cooler.io` -> `cooler.create`.
* `cooler.ice` -> `cooler.balance`.
* New top level public functions:
* `cooler.create_cooler()`. Use instead of `cooler.io.create` and `cooler.io.create_from_unordered`.
* `cooler.merge_coolers()`
* `cooler.coarsen_cooler()`
* `cooler.zoomify_cooler()`
* `cooler.balance_cooler()`. Alias: `cooler.balance.iterative_correction()`.
* Refactored file operations available in `cooler.fileops`. See the API reference.
### CLI changes
* Various output options added to `cooler info`, `cooler dump`, `cooler makebins` and `cooler digest`.
* Generic data and attribute hierarchy viewers `cooler tree` and `cooler attrs`.
* Generic `cp`, `mv` and `ln` convenience commands.
* New verbosity and process info options.
### Maintenance
* Unit tests refactored and re-written for pytest.
## [v0.7.11](https://github.com/open2c/cooler/compare/v0.7.10...v0.7.11)
Date: 2018-08-17
* Genomic range parser supports humanized units (k/K(b), m/M(b), g/G(b))
* Experimental support for arbitrary aggregation operations in `cooler csort` (e.g. mean, median, max, min)
* Documentation updates
Bug fixes
* Fix newline handling for csort when p1 or p2 is last column.
* Fix `--count-as-float` regression in load/cload.
## [v0.7.10](https://github.com/open2c/cooler/compare/v0.7.9...v0.7.10)
Date: 2018-05-07
* Fix a shallow copy bug in validate pixels causing records to sometimes flip twice.
* Add ignore distance (bp) filter to cooler balance
* Start using shuffle filter by default
## [v0.7.9](https://github.com/open2c/cooler/compare/v0.7.8...v0.7.9)
Date: 2018-03-30
* Indexed pairs loading commands now provide option for 0- or 1-based positions (1-based by default). #115
* Fixed error introduced into cload pairix in last release.
## [v0.7.8](https://github.com/open2c/cooler/compare/v0.7.7...v0.7.8)
Date: 2018-03-18
### Enhancements
* New `cooler cload pairs` command provides index-free loading of pairs.
* Changed name of `create_from_unsorted` to more correct `create_from_unordered`.
### Bug fixes
* Fixed broken use of single-file temporary store in `create_from_unordered`.
* Added heuristic in pairix cload to prevent excessively large chunks. #92
* Added extra checks in `cload pairix` and `cload tabix`. #62, #75
## [v0.7.7](https://github.com/open2c/cooler/compare/v0.7.6...v0.7.7)
Date: 2018-03-16
### Enhancements
* Implementation of unsorted (index-free) loading
* `cooler.io.create_from_unsorted` takes an iterable of pixel dataframe chunks that need not be properly sorted.
* Use input sanitization procedures for pairs `sanitize_records` and binned data `sanitize_pixels` to feed data to `create_from_unsorted`. #87 #108 #109
* The `cooler load` command is now index-free: unsorted `COO` and `BG2` input data can be streamed in. #90. This will soon be implemented as an option for loading pairs as well.
* Prevent `cooler balance` command from exiting with non-zero status upon failed convergence using convergence error policies. #93
* Improve the `create` API to support pandas read_csv-style `columns` and `dtype` kwargs to add extra value columns or override default dtypes. #108
* Experimental implementation of trans-only balancing. #56
### Bug fixes
* Fix argmax deprecation. #99
## [v0.7.6](https://github.com/open2c/cooler/compare/v0.7.5...v0.7.6)
Date: 2017-10-31
### Enhancements
* Cooler zoomify with explicit resolutions
* Towards standardization of multicooler structure
* Support for loading 1-based COO triplet input files
### Bug fixes
* Fixed issue of exceeding header limit with too many scaffolds. If header size is exceeded, chrom IDs are stored as raw integers instead of HDF5 enums. There should be no effect at the API level.
* Fixed issue of single-column chromosomes files not working in `cload`.
* Fixed edge case in performing joins when using both `as_pixels` and `join` options in the matrix selector.
Happy Halloween!
## [v0.7.5](https://github.com/open2c/cooler/compare/v0.7.4...v0.7.5)
Date: 2017-07-13
* Fix pandas issue affecting cases when loading single chromosomes
* Add transform options to higlass API
## [v0.7.4](https://github.com/open2c/cooler/compare/v0.7.3...v0.7.4)
Date: 2017-05-25
* Fix regression in automatic --balance option in cooler zoomify
* Fix special cases where cooler.io.create and append would not work with certain inputs
## [v0.7.3](https://github.com/open2c/cooler/compare/v0.7.2...v0.7.3)
Date: 2017-05-22
* Added function to print higlass zoom resolutions for a given genome and base resolution.
## [v0.7.2](https://github.com/open2c/cooler/compare/v0.7.1...v0.7.2)
Date: 2017-05-09
* Improve chunking and fix pickling issue with aggregating very large text datasets
* Restore zoom binsize metadata to higlass files
## [v0.7.1](https://github.com/open2c/cooler/compare/v0.7.0...v0.7.1)
Date: 2017-04-29
* `cooler load` command can now accept supplemental pixel fields and custom field numbers
* Fix parsing errors with unused pixel fields
* Eliminate hard dependence on dask to make pip installs simpler. Conda package will retain dask as a run time requirement.
## [v0.7.0](https://github.com/open2c/cooler/compare/v0.6.6...v0.7.0)
Date: 2017-04-27
### New features
* New Cooler URIs: Full support for Cooler objects anywhere in the data hierarchy of a .cool file
* Experimental dask support via `cooler.contrib.dask`
* New explicit bin blacklist option for `cooler balance`
* Various new CLI tools:
* `cooler list`
* `cooler copy`
* `cooler merge`
* `cooler csort` now produces Pairix files by default
* `cooler load` now accepts two types of matrix text input formats
* 3-column sparse matrix
* 7-column bg2.gz (2D bedGraph) indexed with Pairix (e.g. using csort)
* `cooler coarsegrain` renamed `cooler coarsen`
* Multi-resolution HiGlass input files can now be generated with the `cooler zoomify` command
* More flexible API functions to create and append columns to Coolers in `cooler.io`
#### API/CLI changes
* `cooler.io.create` signature changed; `chromsizes` argument is deprecated.
* `cooler csort` argument order changed
### Bug fixes
* Chromosome name length restriction removed
* `Cooler.open` function now correctly opens the specific root group of the Cooler and behaves like a proper context manager in all cases
## [v0.6.6](https://github.com/open2c/cooler/compare/v0.6.5...v0.6.6)
Date: 2017-03-21
* Chromosome names longer than 32 chars are forbidden for now
* Improved pairix and tabix iterators, dropped need for slow first pass over contacts
## [v0.6.5](https://github.com/open2c/cooler/compare/v0.6.4...v0.6.5)
Date: 2017-03-18
* Fixed pairix aggregator to properly deal with autoflipping of pairs
## [v0.6.4](https://github.com/open2c/cooler/compare/v0.6.3...v0.6.4)
Date: 2017-03-17
* Migrated higlass multires aggregator to `cooler coarsegrain` command
* Fixed pairix aggregator to properly deal with autoflipping of pairs
## [v0.6.3](https://github.com/open2c/cooler/compare/v0.6.2...v0.6.3)
Date: 2017-02-22
* Merge PairixAggregator patch from Soo.
* Update repr string
* Return matrix scale factor in balance stats rather than the bias scale factor: #35.
## [v0.6.2](https://github.com/open2c/cooler/compare/v0.6.1...v0.6.2)
Date: 2017-02-12
Fixed regressions in
* cooler cload tabix/pairix failed on non-fixed sized bins
* cooler show
## [v0.6.1](https://github.com/open2c/cooler/compare/v0.6.0...v0.6.1)
Date: 2017-02-06
* This fixes stale build used in bdist_wheel packaging that broke 0.6.0. #29
## [v0.6.0](https://github.com/open2c/cooler/compare/v0.5.3...v0.6.0)
Date: 2017-02-03
### Enhancements
* Dropped Python 3.3 support. Added 3.6 support.
* Added `contrib` subpackage containing utilities for higlass, including multires aggregation.
* Fixed various issues with synchronizing read/write multiprocessing with HDF5.
* Replacing prints with logging.
* Added sandboxed `tools` module to develop utilities for out-of-core algorithms using Coolers.
### New features
* Cooler objects have additional convenience properties `chromsizes`, `chromnames`.
* New file introspection functions `ls` and `is_cooler` to support nested Cooler groups.
* Cooler initializer can accept a file path and path to Cooler group.
* `cload` accepts contact lists in hiclib-style HDF5 format, the legacy tabix-indexed format, and new pairix-indexed format.
### API/CLI changes
* `create` only accepts a file path and optional group path instead of an open file object.
* `Cooler.matrix` selector now returns a balanced dense 2D NumPy array by default. Explicitly set `balance` to False to get raw counts and set `sparse` to True to get a `coo_matrix` as per old behavior.
* Command line parameters of `cload` changed significantly
### Bug fixes
* Fixed bug in `csort` that led to incorrect triangularity of trans read pairs.
## [v0.5.3](https://github.com/open2c/cooler/compare/v0.5.2...v0.5.3)
Date: 2016-09-10
* Check for existence of required external tools in CLI
* Fixed `cooler show` incompatibility with older versions of matplotlib
* Fixed `cooler.annotate` to work on empty dataframe input
* Fixed broken pipe signals not getting suppressed on Python 2
* `cooler cload` raises a warning when bin file lists a contig missing from the contact list
## [v0.5.2](https://github.com/open2c/cooler/compare/v0.5.1...v0.5.2)
Date: 2016-08-26
* Fix bug in `cooler csort` parsing of chromsizes file.
* Workaround for two locale-related issues on Python 3. Only affects cases where a machine's locale is set to ASCII or Unices which use the ambiguous C or POSIX locales.
* Fix typo in setup.py and add pysam to dependencies.
## [v0.5.1](https://github.com/open2c/cooler/compare/v0.5.0...v0.5.1)
Date: 2016-08-24
* Bug fix in input parser to `cooler csort`
* Update triu reording awk template in `cooler csort`
* Rename `cooler binnify` to `cooler makebins`. Binnify sounds like "aggregate" which is what `cload` does.
## [v0.5.0](https://github.com/open2c/cooler/compare/v0.4.0...v0.5.0)
Date: 2016-08-24
* Most scripts ported over to a new command line interface using the Click framework with many updates.
* New `show` and `info` scripts.
* Updated Readme.
* Minor bug fixes.
## [v0.4.0](https://github.com/open2c/cooler/compare/v0.3.0...v0.4.0)
Date: 2016-08-18
### Schema
* Updated file schema: v2
* `/bins/chroms` is now an enum instead of string column
### API changes
* Table views are a bit more intuitive: selecting field names on table view objects returns a new view on the subset of columns.
* New API function: `cooler.annotate` for doing joins
### New Features
* Support for nested Cooler "trees" at any depth in an HDF5 hierarchy
* Refactored `cooler.io` to provide "contact readers" that process different kinds of input (aggregate from a contact list, load from an existing matrix, etc.)
* Added new scripts for contact aggregation, loading, dumping and balancing
## [v0.3.0](https://github.com/open2c/cooler/compare/v0.2.1...v0.3.0)
Date: 2016-02-18
* 2D range selector `matrix()` now provides either rectangular data as coo_matrix or triangular data as a pixel table dataframe.
* Added binning support for any genome segmentation (i.e., fixed or variable bin width).
* Fixed issues with binning data from mapped read files.
* Genomic locus string parser now accepts ENSEMBL-style number-only chromosome names and FASTA-style sequence names containing pipes.
## [v0.2.1](https://github.com/open2c/cooler/compare/v0.2...v0.2.1)
Date: 2016-02-07
* Fixed bintable region fetcher
## [v0.2](https://github.com/open2c/cooler/compare/v0.1...v0.2)
Date: 2016-01-17
* First beta release
## [v0.1](https://github.com/open2c/cooler/releases/tag/v0.1)
Date: 2015-11-22
* Working initial prototype.
cooler-0.9.3/CITATION.cff 0000664 0000000 0000000 00000004275 14477647226 0014733 0 ustar 00root root 0000000 0000000 cff-version: 1.2.0
type: software
title: cooler
message: >-
Please cite this software using the metadata from "preferred-citation".
abstract: >-
Cooler is a Python support library for .cool files: an efficient storage format
for high resolution genomic interaction matrices. The cooler package aims to
provide the following functionality:
* Build contact matrices at any genomic resolution.
* Query contact matrices.
* Export and visualize the data.
* Perform scalable out-of-core operations on the data.
* Provide a clean and well-documented Python API to interact with the data.
Follow cooler development on GitHub.
license: "BSD-3-Clause"
doi: "10.5281/zenodo.597976"
url: "https://open2c.github.io/cooler"
repository-code: "https://github.com/open2c/cooler"
keywords:
- bioinformatics
- genomics
- Hi-C
- sparse
- matrix
- format
- Python
- out-of-core
authors:
- given-names: Nezar
family-names: Abdennur
orcid: "https://orcid.org/0000-0001-5814-0864"
- given-names: Anton
family-names: Goloborodko
orcid: "https://orcid.org/0000-0002-2210-8616"
- given-names: Maxim
family-names: Imakaev
orcid: "https://orcid.org/0000-0002-5320-2728"
- given-names: Peter
family-names: Kerpedjiev
- family-names: Fudenberg
given-names: Geoffrey
orcid: "https://orcid.org/0000-0001-5905-6517"
- family-names: Oullette
given-names: Scott
- given-names: Soohyun
family-names: Lee
orcid: "https://orcid.org/0000-0002-3594-6213"
- given-names: Hendrik
family-names: Strobelt
- given-names: Nils
family-names: Gehlenborg
orcid: "https://orcid.org/0000-0003-0327-8297"
- given-names: Leonid A.
family-names: Mirny
orcid: "https://orcid.org/0000-0002-0785-5410"
preferred-citation:
type: article
title: "Cooler: scalable storage for Hi-C data and other genomically labeled arrays"
authors:
- given-names: Nezar
family-names: Abdennur
orcid: "https://orcid.org/0000-0001-5814-0864"
- given-names: Leonid A.
family-names: Mirny
orcid: "https://orcid.org/0000-0002-0785-5410"
journal: Bioinformatics
month: 1
volume: 36
issue: 1
pages: 311-316
year: 2020
doi: "10.1093/bioinformatics/btz540"
cooler-0.9.3/CODE_OF_CONDUCT.md 0000664 0000000 0000000 00000003730 14477647226 0015633 0 ustar 00root root 0000000 0000000 # Code of Conduct
## Our Pledge
We as members, contributors, and leaders pledge to act and interact in ways that
contribute to an open, welcoming and healthy community.
## Our Principles
### Assume good faith
Contributors have many ways of reaching our common goals which may differ from
your ways. Give others the chance to demonstrate that they are working towards
those shared goals and are not trying to offend you.
### Be respectful
In order to assume good faith, we must also act in good faith. Be empathetic and
kind toward other people, and be respectful of differing viewpoints, experiences,
and cultural backgrounds.
### Be collaborative
Give and gracefully accept constructive feedback, assistance, advice or
mentorship. Be welcoming towards anyone who wishes to contribute.
### Be open
Preferably use public methods of communication unless posting something
sensitive.
## Moderation
Community leaders are responsible for clarifying our principles of behavior and
will take appropriate and fair corrective action in response to any behavior
that they deem inappropriate, threatening, or harmful.
Community leaders have the right to remove, edit, or reject comments, commits,
code, wiki edits, issues, and other contributions that are not aligned to this
Code of Conduct, and will communicate reasons for moderation decisions when
appropriate.
## Scope
This Code of Conduct applies within all community spaces and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail
address, posting via an official social media account, or acting as an
appointed representative at an online or offline event.
## Attribution
This Code of Conduct is inspired by and partially adapted from the [Debian Code
of Conduct](https://www.debian.org/code_of_conduct), version 1.0 and the
[Contributor Covenant](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html),
version 2.1.
cooler-0.9.3/CONTRIBUTING.md 0000664 0000000 0000000 00000007706 14477647226 0015274 0 ustar 00root root 0000000 0000000 # Contributing
## General guidelines
If you haven't contributed to open-source before, we recommend you read [this excellent guide by GitHub on how to contribute to open source](https://opensource.guide/how-to-contribute). The guide is long, so you can gloss over things you're familiar with.
If you're not already familiar with it, we follow the [fork and pull model](https://help.github.com/articles/about-collaborative-development-models) on GitHub. Also, check out this recommended [git workflow](https://www.asmeurer.com/git-workflow/).
## Contributing Code
This project has a number of requirements for all code contributed.
* We follow the [PEP-8 style](https://www.python.org/dev/peps/pep-0008/) convention.
* We use [Numpy-style docstrings](https://numpydoc.readthedocs.io/en/latest/format.html).
* It's ideal if user-facing API changes or new features have documentation added.
## Setting up Your Development Environment
After forking and cloning the repository, install in "editable" (i.e. development) mode using the `-e` option:
```sh
git clone https://github.com/open2c/cooler.git
cd cooler
pip install -e .[all]
```
Editable mode installs the package by creating a "link" to the working (repo) directory.
## Running/Adding Unit Tests
It is best if all new functionality and/or bug fixes have unit tests added with each use-case.
We use [pytest](https://docs.pytest.org/en/latest) as our unit testing framework. Once you've configured your environment, you can just `cd` to the root of your repository and run
```sh
pytest
```
Unit tests are automatically run on Travis CI for pull requests.
## Adding/Building the Documentation
If a feature is stable and relatively finalized, it is time to add it to the documentation. If you are adding any private/public functions, it is best to add docstrings, to aid in reviewing code and also for the API reference.
We use [Numpy style docstrings](https://numpydoc.readthedocs.io/en/latest/format.html>) and [Sphinx](http://www.sphinx-doc.org/en/stable) to document this library. Sphinx, in turn, uses [reStructuredText](http://www.sphinx-doc.org/en/stable/rest.html) as its markup language for adding code.
We use the [Sphinx Autosummary extension](http://www.sphinx-doc.org/en/stable/ext/autosummary.html) to generate API references. You may want to look at `docs/api.rst` to see how these files look and where to add new functions, classes or modules.
To build the documentation:
```sh
make docs
```
After this, you can find an HTML version of the documentation in `docs/_build/html/index.html`.
Documentation from `master` and tagged releases is automatically built and hosted thanks to [readthedocs](https://readthedocs.org/).
## Acknowledgments
This document is based off of the [guidelines from the sparse project](https://github.com/pydata/sparse/blob/master/docs/contributing.rst).
cooler-0.9.3/LICENSE 0000664 0000000 0000000 00000002772 14477647226 0014046 0 ustar 00root root 0000000 0000000 BSD 3-Clause License
Copyright (c) 2015-2023, Cooler developers
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cooler-0.9.3/README.md 0000664 0000000 0000000 00000013223 14477647226 0014311 0 ustar 00root root 0000000 0000000 # Cooler
Latest Release
License
Build Status
Pre-commit Status
Docs Status
Coverage
Downloads
Citation
Community
## A cool place to store your Hi-C
Cooler is a support library for a **sparse, compressed, binary** persistent storage [format](http://cooler.readthedocs.io/en/latest/schema.html), also called cooler, used to store genomic interaction data, such as Hi-C contact matrices.
The cooler file format is an implementation of a genomic matrix data model using [HDF5](https://en.wikipedia.org/wiki/Hierarchical_Data_Format) as the container format. The `cooler` package includes a suite of [command line tools](http://cooler.readthedocs.io/en/latest/cli.html) and a [Python API](http://cooler.readthedocs.io/en/latest/api.html) to facilitate creating, querying and manipulating cooler files.
To get started:
- [Install](#Installation) cooler
- Read the [documentation](http://cooler.readthedocs.org/en/stable/) and see the Jupyter Notebook [walkthrough](https://github.com/open2c/cooler-binder).
- _cool_ files from published Hi-C data sets are available at `ftp://cooler.csail.mit.edu/coolers`.
- Many more multires (_mcool_) files are available on the [4DN data portal](https://data.4dnucleome.org/visualization/index).
### Installation
Install from PyPI using pip.
```sh
$ pip install cooler
```
If you are using `conda`, you can alternatively install `cooler` from the [bioconda](https://bioconda.github.io/index.html) channel.
```sh
$ conda install -c conda-forge -c bioconda cooler
```
### Citing
Abdennur, N., and Mirny, L.A. (2020). Cooler: scalable storage for Hi-C data and other genomically labeled arrays. _Bioinformatics_. doi: [10.1093/bioinformatics/btz540](https://doi.org/10.1093/bioinformatics/btz540).
```bibtex
@article{cooler2020,
author = {Abdennur, Nezar and Mirny, Leonid A},
title = "{Cooler: scalable storage for Hi-C data and other genomically labeled arrays}",
journal={Bioinformatics},
volume={36},
number={1},
pages={311--316},
year={2020},
doi = {10.1093/bioinformatics/btz540},
url = {https://doi.org/10.1093/bioinformatics/btz540},
}
```
### Contributing
Interested in contributing to cooler? That's great! To get started, check out the [contributing guide](https://github.com/open2c/cooler/blob/master/CONTRIBUTING.md).
### Related projects
- Process Hi-C data with [distiller](https://github.com/open2c/distiller)!
- Downstream analysis with [cooltools](https://github.com/open2c/cooltools)!
- Visualize your cooler data with [HiGlass](http://higlass.io)!
cooler-0.9.3/docs/ 0000775 0000000 0000000 00000000000 14477647226 0013761 5 ustar 00root root 0000000 0000000 cooler-0.9.3/docs/Makefile 0000664 0000000 0000000 00000016361 14477647226 0015430 0 ustar 00root root 0000000 0000000 # Makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = _build
# User-friendly check for sphinx-build
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
endif
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext
help:
@echo "Please use \`make ' where is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " applehelp to make an Apple Help Book"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " xml to make Docutils-native XML files"
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
@echo " coverage to run coverage check of the documentation (if enabled)"
clean:
rm -rf $(BUILDDIR)/*
html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."
json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."
htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/cooler.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/cooler.qhc"
applehelp:
$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
@echo
@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
@echo "N.B. You won't be able to view it unless you put it in" \
"~/Library/Documentation/Help or install it in your application" \
"bundle."
devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/cooler"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/cooler"
@echo "# devhelp"
epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."
latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
latexpdfja:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through platex and dvipdfmx..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."
man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."
info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."
linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."
coverage:
$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
@echo "Testing of coverage in the sources finished, look at the " \
"results in $(BUILDDIR)/coverage/python.txt."
xml:
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
@echo
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
pseudoxml:
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
@echo
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
cooler-0.9.3/docs/api.rst 0000664 0000000 0000000 00000004122 14477647226 0015263 0 ustar 00root root 0000000 0000000 .. _api-reference:
API Reference
=============
.. toctree::
:maxdepth: 1
Quick reference
---------------
Cooler class
~~~~~~~~~~~~
.. autosummary::
cooler.Cooler
cooler.Cooler.binsize
cooler.Cooler.chromnames
cooler.Cooler.chromsizes
cooler.Cooler.bins
cooler.Cooler.pixels
cooler.Cooler.matrix
cooler.Cooler.open
cooler.Cooler.info
cooler.Cooler.offset
cooler.Cooler.extent
Creation/reduction
~~~~~~~~~~~~~~~~~~
.. autosummary::
cooler.create_cooler
cooler.merge_coolers
cooler.coarsen_cooler
cooler.zoomify_cooler
cooler.create_scool
Manipulation
~~~~~~~~~~~~
.. autosummary::
cooler.annotate
cooler.balance_cooler
cooler.rename_chroms
File operations
~~~~~~~~~~~~~~~
.. autosummary::
cooler.fileops.is_cooler
cooler.fileops.is_multires_file
cooler.fileops.list_coolers
cooler.fileops.cp
cooler.fileops.mv
cooler.fileops.ln
.. Sandbox
.. ~~~~~~~
.. .. autosummary::
.. cooler.sandbox.dask.read_table
----
cooler
------
.. autoclass:: cooler.Cooler
:members:
.. autofunction:: cooler.annotate
.. autofunction:: cooler.create_cooler
.. autofunction:: cooler.merge_coolers
.. autofunction:: cooler.coarsen_cooler
.. autofunction:: cooler.zoomify_cooler
.. autofunction:: cooler.balance_cooler
.. autofunction:: cooler.rename_chroms
.. autofunction:: cooler.create_scool
----
cooler.create
-------------
.. autofunction:: cooler.create.sanitize_pixels
.. autofunction:: cooler.create.sanitize_records
cooler.fileops
--------------
.. autofunction:: cooler.fileops.is_cooler
.. autofunction:: cooler.fileops.is_multires_file
.. autofunction:: cooler.fileops.list_coolers
.. autofunction:: cooler.fileops.cp
.. autofunction:: cooler.fileops.mv
.. autofunction:: cooler.fileops.ln
cooler.util
-----------
.. autofunction:: cooler.util.partition
.. autofunction:: cooler.util.fetch_chromsizes
.. autofunction:: cooler.util.read_chromsizes
.. autofunction:: cooler.util.binnify
.. autofunction:: cooler.util.digest
.. cooler.sandbox
.. --------------
.. .. autofunction:: cooler.sandbox.dask.read_table
cooler-0.9.3/docs/cli.rst 0000664 0000000 0000000 00000102755 14477647226 0015274 0 ustar 00root root 0000000 0000000 .. _cli-reference:
CLI Reference
=============
.. toctree::
:maxdepth: 1
Quick reference
---------------
.. program:: cooler
.. code-block:: shell
cooler [OPTIONS] COMMAND [ARGS]...
.. list-table::
:widths: 25 100
:align: left
:header-rows: 1
* - Data ingest
-
* - `cooler cload`_
- Create a cooler from genomic point pairs and bins.
* - `cooler load`_
- Create a cooler from a pre-binned matrix.
.. list-table::
:widths: 25 100
:align: left
:header-rows: 1
* - Reduction
-
* - `cooler merge`_
- Merge multiple coolers with identical axes.
* - `cooler coarsen`_
- Coarsen a cooler to a lower resolution.
* - `cooler zoomify`_
- Generate a multi-resolution cooler file by coarsening.
.. list-table::
:widths: 25 100
:align: left
:header-rows: 1
* - Normalization
-
* - `cooler balance`_
- Out-of-core matrix balancing.
.. list-table::
:widths: 25 100
:align: left
:header-rows: 1
* - Export/visualization
-
* - `cooler info`_
- Display a cooler’s info and metadata.
* - `cooler dump`_
- Dump a cooler’s data to a text stream.
* - `cooler show`_
- Display and browse a cooler with matplotlib.
.. list-table::
:widths: 25 100
:align: left
:header-rows: 1
* - File manipulation/info
-
* - `cooler tree`_
- Display a file’s data hierarchy.
* - `cooler attrs`_
- Display a file’s attribute hierarchy.
* - `cooler ls`_
- List all coolers inside a file.
* - `cooler cp`_
- Copy a cooler from one file to another or within the same file.
* - `cooler mv`_
- Rename a cooler within the same file.
* - `cooler ln`_
- Create a hard, soft or external link to a cooler.
.. list-table::
:widths: 25 100
:align: left
:header-rows: 1
* - Helper commands
-
* - `cooler makebins`_
- Generate fixed-width genomic bins.
* - `cooler digest`_
- Generate fragment-delimited genomic bins.
* - `cooler csort`_
- Sort and index a contact list.
.. rubric:: Options
.. option:: -v, --verbose
Verbose logging.
.. option:: -d, --debug
On error, drop into the post-mortem debugger shell.
.. option:: -V, --version
Show the version and exit.
See the cooler_cli.ipynb Jupyter Notebook for specific examples on usage: (https://github.com/open2c/cooler-binder).
----
cooler cload
------------
Create a cooler from genomic pairs and bins.
Choose a subcommand based on the format of the input contact list.
.. program:: cooler cload
.. code-block:: shell
cooler cload [OPTIONS] COMMAND [ARGS]...
.. rubric:: Commands
.. hlist::
:columns: 4
* .. object:: hiclib
* .. object:: pairix
* .. object:: pairs
* .. object:: tabix
----
cooler cload pairs
------------------
Bin any text file or stream of pairs.
Pairs data need not be sorted. Accepts compressed files.
To pipe input from stdin, set PAIRS_PATH to '-'.
BINS : One of the following
: 1. Path to a chromsizes file, 2. Bin size in bp
: Path to BED file defining the genomic bin segmentation.
PAIRS_PATH : Path to contacts (i.e. read pairs) file.
COOL_PATH : Output COOL file path or URI.
.. program:: cooler cload pairs
.. code-block:: shell
cooler cload pairs [OPTIONS] BINS PAIRS_PATH COOL_PATH
.. rubric:: Arguments
.. option:: BINS
Required argument
.. option:: PAIRS_PATH
Required argument
.. option:: COOL_PATH
Required argument
.. rubric:: Options
.. option:: --metadata
Path to JSON file containing user metadata.
.. option:: --assembly
Name of genome assembly (e.g. hg19, mm10)
.. option:: -c1, --chrom1
chrom1 field number (one-based) [required]
.. option:: -p1, --pos1
pos1 field number (one-based) [required]
.. option:: -c2, --chrom2
chrom2 field number (one-based) [required]
.. option:: -p2, --pos2
pos2 field number (one-based) [required]
.. option:: --chunksize
Number of input lines to load at a time
.. option:: -0, --zero-based
Positions are zero-based [default: False]
.. option:: --comment-char
Comment character that indicates lines to ignore. [default: #]
.. option:: -N, --no-symmetric-upper
Create a complete square matrix without implicit symmetry. This allows for distinct upper- and lower-triangle values
.. option:: --input-copy-status
Copy status of input data when using symmetric-upper storage. | `unique`: Incoming data comes from a unique half of a symmetric map, regardless of how the coordinates of a pair are ordered. `duplex`: Incoming data contains upper- and lower-triangle duplicates. All input records that map to the lower triangle will be discarded! | If you wish to treat lower- and upper-triangle input data as distinct, use the ``--no-symmetric-upper`` option. [default: unique]
.. option:: --field
Specify quantitative input fields to aggregate into value columns using the syntax ``--field =``. Optionally, append ``:`` followed by ``dtype=`` to specify the data type (e.g. float), and/or ``agg=`` to specify an aggregation function different from sum (e.g. mean). Field numbers are 1-based. Passing 'count' as the target name will override the default behavior of storing pair counts. Repeat the ``--field`` option for each additional field.
.. option:: --temp-dir
Create temporary files in a specified directory. Pass ``-`` to use the platform default temp dir.
.. option:: --no-delete-temp
Do not delete temporary files when finished.
.. option:: --max-merge
Maximum number of chunks to merge before invoking recursive merging [default: 200]
.. option:: --storage-options
Options to modify the data filter pipeline. Provide as a comma-separated list of key-value pairs of the form 'k1=v1,k2=v2,...'. See http://docs.h5py.org/en/stable/high/dataset.html#filter-pipeline for more details.
.. option:: -a, --append
Pass this flag to append the output cooler to an existing file instead of overwriting the file.
----
cooler cload pairix
-------------------
Bin a pairix-indexed contact list file.
BINS : One of the following
: 1. Path to a chromsizes file, 2. Bin size in bp
: Path to BED file defining the genomic bin segmentation.
PAIRS_PATH : Path to contacts (i.e. read pairs) file.
COOL_PATH : Output COOL file path or URI.
See also: 'cooler csort' to sort and index a contact list file
Pairix on GitHub: .
.. program:: cooler cload pairix
.. code-block:: shell
cooler cload pairix [OPTIONS] BINS PAIRS_PATH COOL_PATH
.. rubric:: Arguments
.. option:: BINS
Required argument
.. option:: PAIRS_PATH
Required argument
.. option:: COOL_PATH
Required argument
.. rubric:: Options
.. option:: --metadata
Path to JSON file containing user metadata.
.. option:: --assembly
Name of genome assembly (e.g. hg19, mm10)
.. option:: -p, --nproc
Number of processes to split the work between. [default: 8]
.. option:: -0, --zero-based
Positions are zero-based [default: False]
.. option:: -s, --max-split
Divide the pairs from each chromosome into at most this many chunks. Smaller chromosomes will be split less frequently or not at all. Increase ths value if large chromosomes dominate the workload on multiple processors. [default: 2]
----
cooler cload tabix
------------------
Bin a tabix-indexed contact list file.
BINS : One of the following
: 1. Path to a chromsizes file, 2. Bin size in bp
: Path to BED file defining the genomic bin segmentation.
PAIRS_PATH : Path to contacts (i.e. read pairs) file.
COOL_PATH : Output COOL file path or URI.
See also: 'cooler csort' to sort and index a contact list file
Tabix manpage: .
.. program:: cooler cload tabix
.. code-block:: shell
cooler cload tabix [OPTIONS] BINS PAIRS_PATH COOL_PATH
.. rubric:: Arguments
.. option:: BINS
Required argument
.. option:: PAIRS_PATH
Required argument
.. option:: COOL_PATH
Required argument
.. rubric:: Options
.. option:: --metadata
Path to JSON file containing user metadata.
.. option:: --assembly
Name of genome assembly (e.g. hg19, mm10)
.. option:: -p, --nproc
Number of processes to split the work between. [default: 8]
.. option:: -c2, --chrom2
chrom2 field number (one-based)
.. option:: -p2, --pos2
pos2 field number (one-based)
.. option:: -0, --zero-based
Positions are zero-based [default: False]
.. option:: -s, --max-split
Divide the pairs from each chromosome into at most this many chunks. Smaller chromosomes will be split less frequently or not at all. Increase ths value if large chromosomes dominate the workload on multiple processors. [default: 2]
----
cooler cload hiclib
-------------------
Bin a hiclib HDF5 contact list (frag) file.
BINS : One of the following
: 1. Path to a chromsizes file, 2. Bin size in bp
: Path to BED file defining the genomic bin segmentation.
PAIRS_PATH : Path to contacts (i.e. read pairs) file.
COOL_PATH : Output COOL file path or URI.
hiclib on BitBucket: .
.. program:: cooler cload hiclib
.. code-block:: shell
cooler cload hiclib [OPTIONS] BINS PAIRS_PATH COOL_PATH
.. rubric:: Arguments
.. option:: BINS
Required argument
.. option:: PAIRS_PATH
Required argument
.. option:: COOL_PATH
Required argument
.. rubric:: Options
.. option:: --metadata
Path to JSON file containing user metadata.
.. option:: --assembly
Name of genome assembly (e.g. hg19, mm10)
.. option:: -c, --chunksize
Control the number of pixels handled by each worker process at a time. [default: 100000000]
----
cooler load
-----------
Create a cooler from a pre-binned matrix.
BINS_PATH : One of the following
: 1. Path to a chromsizes file, 2. Bin size in bp
: Path to BED file defining the genomic bin segmentation.
PIXELS_PATH : Text file containing nonzero pixel values. May be gzipped.
Pass '-' to use stdin.
COOL_PATH : Output COOL file path or URI.
**Notes**
Two input format options (tab-delimited).
Input pixel file may be compressed.
COO: COO-rdinate sparse matrix format (a.k.a. ijv triple).
3 columns: "bin1_id, bin2_id, count",
BG2: 2D version of the bedGraph format.
7 columns: "chrom1, start1, end1, chrom2, start2, end2, count"
**Examples**
cooler load -f bg2 : in.bg2.gz out.cool
.. program:: cooler load
.. code-block:: shell
cooler load [OPTIONS] BINS_PATH PIXELS_PATH COOL_PATH
.. rubric:: Arguments
.. option:: BINS_PATH
Required argument
.. option:: PIXELS_PATH
Required argument
.. option:: COOL_PATH
Required argument
.. rubric:: Options
.. option:: -f, --format
'coo' refers to a tab-delimited sparse triplet file (bin1, bin2, count). 'bg2' refers to a 2D bedGraph-like file (chrom1, start1, end1, chrom2, start2, end2, count). [required]
.. option:: --metadata
Path to JSON file containing user metadata.
.. option:: --assembly
Name of genome assembly (e.g. hg19, mm10)
.. option:: --field
Add supplemental value fields or override default field numbers for the specified format. Specify quantitative input fields to aggregate into value columns using the syntax ``--field =``. Optionally, append ``:`` followed by ``dtype=`` to specify the data type (e.g. float). Field numbers are 1-based. Repeat the ``--field`` option for each additional field.
.. option:: -c, --chunksize
Size (in number of lines/records) of data chunks to read and process from the input file at a time. These chunks will be saved as temporary partial Coolers and merged at the end. Also specifies the size of the buffer during the merge step.
.. option:: --count-as-float
Store the 'count' column as floating point values instead of as integers. Can also be specified using the `--field` option.
.. option:: --one-based
Pass this flag if the bin IDs listed in a COO file are one-based instead of zero-based.
.. option:: --comment-char
Comment character that indicates lines to ignore. [default: #]
.. option:: -N, --no-symmetric-upper
Create a complete square matrix without implicit symmetry. This allows for distinct upper- and lower-triangle values
.. option:: --input-copy-status
Copy status of input data when using symmetric-upper storage. | `unique`: Incoming data comes from a unique half of a symmetric matrix, regardless of how element coordinates are ordered. Execution will be aborted if duplicates are detected. `duplex`: Incoming data contains upper- and lower-triangle duplicates. All lower-triangle input elements will be discarded! | If you wish to treat lower- and upper-triangle input data as distinct, use the ``--no-symmetric-upper`` option instead. [default: unique]
.. option:: --temp-dir
Create temporary files in a specified directory. Pass ``-`` to use the platform default temp dir.
.. option:: --no-delete-temp
Do not delete temporary files when finished.
.. option:: --storage-options
Options to modify the data filter pipeline. Provide as a comma-separated list of key-value pairs of the form 'k1=v1,k2=v2,...'. See http://docs.h5py.org/en/stable/high/dataset.html#filter-pipeline for more details.
.. option:: -a, --append
Pass this flag to append the output cooler to an existing file instead of overwriting the file.
----
cooler merge
------------
Merge multiple coolers with identical axes.
OUT_PATH : Output file path or URI.
IN_PATHS : Input file paths or URIs of coolers to merge.
**Notes**
Data columns merged:
pixels/bin1_id, pixels/bin2_id, pixels/
Data columns preserved:
chroms/name, chroms/length
bins/chrom, bins/start, bins/end
Additional columns in the the input files are not transferred to the output.
.. program:: cooler merge
.. code-block:: shell
cooler merge [OPTIONS] OUT_PATH [IN_PATHS]...
.. rubric:: Arguments
.. option:: OUT_PATH
Required argument
.. option:: IN_PATHS
Optional argument(s)
.. rubric:: Options
.. option:: -c, --chunksize
Size of the merge buffer in number of pixel table rows. [default: 20000000]
.. option:: --field
Specify the names of value columns to merge as ''. Repeat the `--field` option for each one. Use ',dtype=' to specify the dtype. Include ',agg=' to specify an aggregation function different from 'sum'.
.. option:: -a, --append
Pass this flag to append the output cooler to an existing file instead of overwriting the file.
----
cooler coarsen
--------------
Coarsen a cooler to a lower resolution.
Works by pooling *k*-by-*k* neighborhoods of pixels and aggregating.
Each chromosomal block is coarsened individually.
COOL_PATH : Path to a COOL file or Cooler URI.
.. program:: cooler coarsen
.. code-block:: shell
cooler coarsen [OPTIONS] COOL_PATH
.. rubric:: Arguments
.. option:: COOL_PATH
Required argument
.. rubric:: Options
.. option:: -k, --factor
Gridding factor. The contact matrix is coarsegrained by grouping each chromosomal contact block into k-by-k element tiles [default: 2]
.. option:: -n, -p, --nproc
Number of processes to use for batch processing chunks of pixels [default: 1, i.e. no process pool]
.. option:: -c, --chunksize
Number of pixels allocated to each process [default: 10000000]
.. option:: --field
Specify the names of value columns to merge as ''. Repeat the `--field` option for each one. Use ',dtype=' to specify the dtype. Include ',agg=' to specify an aggregation function different from 'sum'.
.. option:: -o, --out
Output file or URI [required]
.. option:: -a, --append
Pass this flag to append the output cooler to an existing file instead of overwriting the file.
----
cooler zoomify
--------------
Generate a multi-resolution cooler file by coarsening.
COOL_PATH : Path to a COOL file or Cooler URI.
.. program:: cooler zoomify
.. code-block:: shell
cooler zoomify [OPTIONS] COOL_PATH
.. rubric:: Arguments
.. option:: COOL_PATH
Required argument
.. rubric:: Options
.. option:: -n, -p, --nproc
Number of processes to use for batch processing chunks of pixels [default: 1, i.e. no process pool]
.. option:: -c, --chunksize
Number of pixels allocated to each process [default: 10000000]
.. option:: -r, --resolutions
Comma-separated list of target resolutions. Use suffixes B or N to specify a progression: B for binary (geometric steps of factor 2), N for nice (geometric steps of factor 10 interleaved with steps of 2 and 5). Examples: 1000B=1000,2000,4000,8000,... 1000N=1000,2000,5000,10000,... 5000N=5000,10000,25000,50000,... 4DN is an alias for 1000,2000,5000N [default: B]
.. option:: --balance
Apply balancing to each zoom level. Off by default.
.. option:: --balance-args
Additional arguments to pass to cooler balance. To deal with space ambiguity, use quotes to pass multiple arguments, e.g. --balance-args '--nproc 8 --ignore-diags 3' Note that nproc for balancing must be specified independently of zoomify arguments.
.. option:: -i, --base-uri
One or more additional base coolers to aggregate from, if needed.
.. option:: -o, --out
Output file or URI
.. option:: --field
Specify the names of value columns to merge as ''. Repeat the `--field` option for each one. Use ':dtype=' to specify the dtype. Include ',agg=' to specify an aggregation function different from 'sum'.
.. option:: --legacy
Use the legacy layout of integer-labeled zoom levels.
----
cooler balance
--------------
Out-of-core matrix balancing.
Matrix must be symmetric. See the help for various filtering options to
mask out poorly mapped bins.
COOL_PATH : Path to a COOL file.
.. program:: cooler balance
.. code-block:: shell
cooler balance [OPTIONS] COOL_PATH
.. rubric:: Arguments
.. option:: COOL_PATH
Required argument
.. rubric:: Options
.. option:: --cis-only
Calculate weights against intra-chromosomal data only instead of genome-wide.
.. option:: --trans-only
Calculate weights against inter-chromosomal data only instead of genome-wide.
.. option:: --ignore-diags
Number of diagonals of the contact matrix to ignore, including the main diagonal. Examples: 0 ignores nothing, 1 ignores the main diagonal, 2 ignores diagonals (-1, 0, 1), etc. [default: 2]
.. option:: --ignore-dist
Distance from the diagonal in bp to ignore. The maximum of the corresponding number of diagonals and `--ignore-diags` will be used.
.. option:: --mad-max
Ignore bins from the contact matrix using the 'MAD-max' filter: bins whose log marginal sum is less than ``mad-max`` median absolute deviations below the median log marginal sum of all the bins in the same chromosome. [default: 5]
.. option:: --min-nnz
Ignore bins from the contact matrix whose marginal number of nonzeros is less than this number. [default: 10]
.. option:: --min-count
Ignore bins from the contact matrix whose marginal count is less than this number. [default: 0]
.. option:: --blacklist
Path to a 3-column BED file containing genomic regions to mask out during the balancing procedure, e.g. sequence gaps or regions of poor mappability.
.. option:: -p, --nproc
Number of processes to split the work between. [default: 8]
.. option:: -c, --chunksize
Control the number of pixels handled by each worker process at a time. [default: 10000000]
.. option:: --tol
Threshold value of variance of the marginals for the algorithm to converge. [default: 1e-05]
.. option:: --max-iters
Maximum number of iterations to perform if convergence is not achieved. [default: 200]
.. option:: --name
Name of column to write to. [default: weight]
.. option:: -f, --force
Overwrite the target dataset, 'weight', if it already exists.
.. option:: --check
Check whether a data column 'weight' already exists.
.. option:: --stdout
Print weight column to stdout instead of saving to file.
.. option:: --convergence-policy
What to do with weights when balancing doesn't converge in max_iters. 'store_final': Store the final result, regardless of whether the iterations converge to the specified tolerance; 'store_nan': Store a vector of NaN values to indicate that the matrix failed to converge; 'discard': Store nothing and exit gracefully; 'error': Abort with non-zero exit status. [default: store_final]
----
cooler info
-----------
Display a cooler's info and metadata.
COOL_PATH : Path to a COOL file or cooler URI.
.. program:: cooler info
.. code-block:: shell
cooler info [OPTIONS] COOL_PATH
.. rubric:: Arguments
.. option:: COOL_PATH
Required argument
.. rubric:: Options
.. option:: -f, --field
Print the value of a specific info field.
.. option:: -m, --metadata
Print the user metadata in JSON format.
.. option:: -o, --out
Output file (defaults to stdout)
----
cooler dump
-----------
Dump a cooler's data to a text stream.
COOL_PATH : Path to COOL file or cooler URI.
.. program:: cooler dump
.. code-block:: shell
cooler dump [OPTIONS] COOL_PATH
.. rubric:: Arguments
.. option:: COOL_PATH
Required argument
.. rubric:: Options
.. option:: -t, --table
Which table to dump. Choosing 'chroms' or 'bins' will cause all pixel-related options to be ignored. Note that for coolers stored in symmetric-upper mode, 'pixels' only holds the upper triangle values of the matrix. [default: pixels]
.. option:: -c, --columns
Restrict output to a subset of columns, provided as a comma-separated list.
.. option:: -H, --header
Print the header of column names as the first row. [default: False]
.. option:: --na-rep
Missing data representation. Default is empty ''.
.. option:: --float-format
Format string for floating point numbers (e.g. '.12g', '03.2f'). [default: g]
.. option:: -r, --range
The coordinates of a genomic region shown along the row dimension, in UCSC-style notation. (Example: chr1:10,000,000-11,000,000). If omitted, the entire contact matrix is printed.
.. option:: -r2, --range2
The coordinates of a genomic region shown along the column dimension. If omitted, the column range is the same as the row range.
.. option:: -f, --fill-lower
For coolers using 'symmetric-upper' storage, populate implicit areas of the genomic query box by generating lower triangle pixels. If not specified, only upper triangle pixels are reported. This option has no effect on coolers stored in 'square' mode. [default: False]
.. option:: -b, --balanced, --no-balance
Apply balancing weights to data. This will print an extra column called `balanced` [default: False]
.. option:: --join
Print the full chromosome bin coordinates instead of bin IDs. This will replace the `bin1_id` column with `chrom1`, `start1`, and `end1`, and the `bin2_id` column with `chrom2`, `start2` and `end2`. [default: False]
.. option:: --annotate
Join additional columns from the bin table against the pixels. Provide a comma separated list of column names (no spaces). The merged columns will be suffixed by '1' and '2' accordingly.
.. option:: --one-based-ids
Print bin IDs as one-based rather than zero-based.
.. option:: --one-based-starts
Print start coordinates as one-based rather than zero-based.
.. option:: -k, --chunksize
Sets the number of pixel records loaded from disk at one time. Can affect the performance of joins on high resolution datasets. [default: 1000000]
.. option:: -o, --out
Output text file If .gz extension is detected, file is written using zlib. Default behavior is to stream to stdout.
----
cooler show
-----------
Display and browse a cooler in matplotlib.
COOL_PATH : Path to a COOL file or Cooler URI.
RANGE : The coordinates of the genomic region to display, in UCSC notation.
Example: chr1:10,000,000-11,000,000
.. program:: cooler show
.. code-block:: shell
cooler show [OPTIONS] COOL_PATH RANGE
.. rubric:: Arguments
.. option:: COOL_PATH
Required argument
.. option:: RANGE
Required argument
.. rubric:: Options
.. option:: -r2, --range2
The coordinates of a genomic region shown along the column dimension. If omitted, the column range is the same as the row range. Use to display asymmetric matrices or trans interactions.
.. option:: -b, --balanced
Show the balanced contact matrix. If not provided, display the unbalanced counts.
.. option:: -o, --out
Save the image of the contact matrix to a file. If not specified, the matrix is displayed in an interactive window. The figure format is deduced from the extension of the file, the supported formats are png, jpg, svg, pdf, ps and eps.
.. option:: --dpi
The DPI of the figure, if saving to a file
.. option:: -s, --scale
Scale transformation of the colormap: linear, log2 or log10. Default is log10.
.. option:: -f, --force
Force display very large matrices (>=10^8 pixels). Use at your own risk as it may cause performance issues.
.. option:: --zmin
The minimal value of the color scale. Units must match those of the colormap scale. To provide a negative value use a equal sign and quotes, e.g. -zmin='-0.5'
.. option:: --zmax
The maximal value of the color scale. Units must match those of the colormap scale. To provide a negative value use a equal sign and quotes, e.g. -zmax='-0.5'
.. option:: --cmap
The colormap used to display the contact matrix. See the full list at http://matplotlib.org/examples/color/colormaps_reference.html
.. option:: --field
Pixel values to display. [default: count]
----
cooler tree
-----------
Display a file's data hierarchy.
.. program:: cooler tree
.. code-block:: shell
cooler tree [OPTIONS] URI
.. rubric:: Arguments
.. option:: URI
Required argument
.. rubric:: Options
.. option:: -L, --level
----
cooler attrs
------------
Display a file's attribute hierarchy.
.. program:: cooler attrs
.. code-block:: shell
cooler attrs [OPTIONS] URI
.. rubric:: Arguments
.. option:: URI
Required argument
.. rubric:: Options
.. option:: -L, --level
----
cooler ls
---------
List all coolers inside a file.
.. program:: cooler ls
.. code-block:: shell
cooler ls [OPTIONS] COOL_PATH
.. rubric:: Arguments
.. option:: COOL_PATH
Required argument
.. rubric:: Options
.. option:: -l, --long
Long listing format
----
cooler cp
---------
Copy a cooler from one file to another or within the same file.
See also: h5copy, h5repack tools from HDF5 suite.
.. program:: cooler cp
.. code-block:: shell
cooler cp [OPTIONS] SRC_URI DST_URI
.. rubric:: Arguments
.. option:: SRC_URI
Required argument
.. option:: DST_URI
Required argument
.. rubric:: Options
.. option:: -w, --overwrite
Truncate and replace destination file if it already exists.
----
cooler mv
---------
Rename a cooler within the same file.
.. program:: cooler mv
.. code-block:: shell
cooler mv [OPTIONS] SRC_URI DST_URI
.. rubric:: Arguments
.. option:: SRC_URI
Required argument
.. option:: DST_URI
Required argument
.. rubric:: Options
.. option:: -w, --overwrite
Truncate and replace destination file if it already exists.
----
cooler ln
---------
Create a hard link to a cooler (rather than a true copy) in the same file.
Also supports soft links (in the same file) or external links (different
files).
.. program:: cooler ln
.. code-block:: shell
cooler ln [OPTIONS] SRC_URI DST_URI
.. rubric:: Arguments
.. option:: SRC_URI
Required argument
.. option:: DST_URI
Required argument
.. rubric:: Options
.. option:: -w, --overwrite
Truncate and replace destination file if it already exists.
.. option:: -s, --soft
Creates a soft link rather than a hard link if the source and destination file are the same. Otherwise, creates an external link. This type of link uses a path rather than a pointer.
----
cooler makebins
---------------
Generate fixed-width genomic bins.
Output a genome segmentation at a fixed resolution as a BED file.
CHROMSIZES_PATH : UCSC-like chromsizes file, with chromosomes in desired
order.
BINSIZE : Resolution (bin size) in base pairs .
.. program:: cooler makebins
.. code-block:: shell
cooler makebins [OPTIONS] CHROMSIZES_PATH BINSIZE
.. rubric:: Arguments
.. option:: CHROMSIZES_PATH
Required argument
.. option:: BINSIZE
Required argument
.. rubric:: Options
.. option:: -o, --out
Output file (defaults to stdout)
.. option:: -H, --header
Print the header of column names as the first row. [default: False]
.. option:: -i, --rel-ids
Include a column of relative bin IDs for each chromosome. Choose whether to report them as 0- or 1-based.
----
cooler digest
-------------
Generate fragment-delimited genomic bins.
Output a genome segmentation of restriction fragments as a BED file.
CHROMSIZES_PATH : UCSC-like chromsizes file, with chromosomes in desired
order.
FASTA_PATH : Genome assembly FASTA file or folder containing FASTA files
(uncompressed).
ENZYME : Name of restriction enzyme
.. program:: cooler digest
.. code-block:: shell
cooler digest [OPTIONS] CHROMSIZES_PATH FASTA_PATH ENZYME
.. rubric:: Arguments
.. option:: CHROMSIZES_PATH
Required argument
.. option:: FASTA_PATH
Required argument
.. option:: ENZYME
Required argument
.. rubric:: Options
.. option:: -o, --out
Output file (defaults to stdout)
.. option:: -H, --header
Print the header of column names as the first row. [default: False]
.. option:: -i, --rel-ids
Include a column of relative bin IDs for each chromosome. Choose whether to report them as 0- or 1-based.
----
cooler csort
------------
Sort and index a contact list.
Order the mates of each pair record so that all contacts are upper
triangular with respect to the chromosome ordering given by the chromosomes
file, sort contacts by genomic location, and index the resulting file.
PAIRS_PATH : Contacts (i.e. read pairs) text file, optionally compressed.
CHROMOSOMES_PATH : File listing desired chromosomes in the desired order.
May be tab-delimited, e.g. a UCSC-style chromsizes file. Contacts mapping to
other chromosomes will be discarded.
**Notes**
| - csort can also be used to sort and index a text representation of
| a contact *matrix* in bedGraph-like format. In this case, substitute
| `pos1` and `pos2` with `start1` and `start2`, respectively.
| - Requires Unix tools: sort, bgzip + tabix or pairix.
If indexing with Tabix, the output file will have the following properties:
| - Upper triangular: the read pairs on each row are assigned to side 1 or 2
| in such a way that (chrom1, pos1) is always "less than" (chrom2, pos2)
| - Rows are lexicographically sorted by chrom1, pos1, chrom2, pos2;
| i.e. "positionally sorted"
| - Compressed with bgzip [*]
| - Indexed using Tabix [*] on chrom1 and pos1.
If indexing with Pairix, the output file will have the following properties:
| - Upper triangular: the read pairs on each row are assigned to side 1 or 2
| in such a way that (chrom1, pos1) is always "less than" (chrom2, pos2)
| - Rows are lexicographically sorted by chrom1, chrom2, pos1, pos2; i.e.
| "block sorted"
| - Compressed with bgzip [*]
| - Indexed using Pairix [+] on chrom1, chrom2 and pos1.
| [*] Tabix manpage: .
| [+] Pairix on Github:
.. program:: cooler csort
.. code-block:: shell
cooler csort [OPTIONS] PAIRS_PATH CHROMOSOMES_PATH
.. rubric:: Arguments
.. option:: PAIRS_PATH
Required argument
.. option:: CHROMOSOMES_PATH
Required argument
.. rubric:: Options
.. option:: -c1, --chrom1
chrom1 field number in the input file (starting from 1) [required]
.. option:: -c2, --chrom2
chrom2 field number [required]
.. option:: -p1, --pos1
pos1 field number [required]
.. option:: -p2, --pos2
pos2 field number [required]
.. option:: -i, --index
Select the preset sort and indexing options [default: pairix]
.. option:: --flip-only
Only flip mates; no sorting or indexing. Write to stdout. [default: False]
.. option:: -p, --nproc
Number of processors [default: 8]
.. option:: -0, --zero-based
Read positions are zero-based [default: False]
.. option:: --sep
Data delimiter in the input file [default: \t]
.. option:: --comment-char
Comment character to skip header [default: #]
.. option:: --sort-options
Quoted list of additional options to `sort` command
.. option:: -o, --out
Output gzip file
.. option:: -s1, --strand1
strand1 field number (deprecated)
.. option:: -s2, --strand2
strand2 field number (deprecated)
----
cooler-0.9.3/docs/concepts.rst 0000664 0000000 0000000 00000023601 14477647226 0016333 0 ustar 00root root 0000000 0000000 Concepts
========
Resource String
---------------
The default location for a single-cooler .cool file is the root group ``/`` of the HDF5 file. It does not need to be explicitly specified.
.. code-block:: python
>>> import cooler
>>> c = cooler.Cooler('data/WT.DpnII.10kb.cool')
>>> c = cooler.Cooler('data/WT.DpnII.10kb.cool::/') # same as above
However, coolers can be stored at any level of the HDF5 hierarchy and qualified using a URI string of the form ``/path/to/cool/file::/path/to/cooler/group``.
.. code-block:: python
>>> c1 = cooler.Cooler('data/WT.DpnII.mcool::resolutions/10000')
>>> c2 = cooler.Cooler('data/WT.DpnII.mcool::resolutions/1000')
The current standard for Hi-C coolers is to name multi-resolution coolers under ``.mcool`` extension,
and store differrent resolutions in an HDF5 group ``resolutions``, as shown above.
Data selection
--------------
Several :class:`cooler.Cooler` methods return data selectors. Those include selecting tables and matrices (see below). Data selectors don't retrieve any data from disk until queried. There are several ways to query using selectors. Genomic range strings may be provided as 3-tuples ``(chrom: str, start: int, end: int)`` or in UCSC-style strings of the style ``{chrom}:{start}-{end}``. Unit prefixes ``k, M, G`` are supported in range strings. For regions with start and end that are not multiples of the resolution, selectors return the range of shortest range bins that fully contains the open interval [start, end).
Table selectors (chroms, bins, pixels)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
There are data selectors for the three tables: :meth:`cooler.Cooler.chroms`, :meth:`cooler.Cooler.bins`, :meth:`cooler.Cooler.pixels`.
They support the following:
- lazily select columns or lists of columns, returning new selectors
- query table rows using integer/slice indexing syntax
.. code-block:: python
>>> c.bins()
>>> c.bins()[:10]
chrom start end weight
0 chr1 0 1000000 NaN
1 chr1 1000000 2000000 1.243141
2 chr1 2000000 3000000 1.313995
3 chr1 3000000 4000000 1.291705
4 chr1 4000000 5000000 1.413288
5 chr1 5000000 6000000 1.165382
6 chr1 6000000 7000000 0.811824
7 chr1 7000000 8000000 1.056107
8 chr1 8000000 9000000 1.058915
9 chr1 9000000 10000000 1.035910
>>> c.pixels()[:10]
bin1_id bin2_id count
0 0 0 18578
1 0 1 11582
2 0 2 446
3 0 3 196
4 0 4 83
5 0 5 112
6 0 6 341
7 0 7 255
8 0 8 387
9 0 9 354
>>> c.bins()['weight']
>>> weights = c.bins()['weight'].fetch('chr3')
>>> weights.head()
494 1.144698
495 1.549848
496 1.212580
497 1.097539
498 0.871931
Name: weight, dtype: float64
>>> mybins1 = c.bins().fetch('chr3:10,000,000-20,000,000')
>>> mybins2 = c.bins().fetch( ('chr3', 10000000, 20000000) )
>>> mybins2.head()
chrom start end weight
504 chr3 10000000 11000000 0.783160
505 chr3 11000000 12000000 0.783806
506 chr3 12000000 13000000 0.791204
507 chr3 13000000 14000000 0.821171
508 chr3 14000000 15000000 0.813079
Matrix selector
~~~~~~~~~~~~~~~
The :meth:`cooler.Cooler.matrix` selector supports two types of queries:
- 2D bin range queries using slice indexing syntax
- 2D genomic range range queries using the ``fetch`` method
The matrix selector’s fetch method is intended to represent a **2D range query** (rectangular window), similar to the slice semantics of a 2D array. Given a matrix selector ``sel``, when calling ``sel.fetch(region1, region2)`` the ``region1`` and ``region2`` are single contiguous genomic ranges along the first and second axes of the contact matrix. This mirrors the global slice indexing interface of the matrix selector ``sel[a:b, c:d]``, where the only difference is that the genomic range syntax cannot cross chromosome boundaries. If ``region2`` is not provided, it is taken to be the same as ``region1``. That means that ``sel.fetch('chr2:10M-20M')`` returns the same result as ``sel.fetch('chr2:10M-20M', 'chr2:10M-20M')``. As a single rectangular window, queries like ``sel.fetch('chr2', 'chr3')`` will return *inter*-chromosomal values and not intra-chromosomal ones.
.. code-block:: python
>>> c.matrix(balance=False)[1000:1005, 1000:1005]
array([[120022, 34107, 17335, 14053, 4137],
[ 34107, 73396, 47427, 16125, 3642],
[ 17335, 47427, 80458, 25105, 5394],
[ 14053, 16125, 25105, 104536, 27214],
[ 4137, 3642, 5394, 27214, 114135]])
>>> matrix = c.matrix(sparse=True, balance=False)
>>> matrix
>>> matrix[:]
<3114x3114 sparse matrix of type ''
with 8220942 stored elements in COOrdinate format>
>>> c.matrix(balance=False, as_pixels=True, join=True)[1000:1005, 1000:1005]
chrom1 start1 end1 chrom2 start2 end2 count
0 chr5 115000000 116000000 chr5 115000000 116000000 120022
1 chr5 115000000 116000000 chr5 116000000 117000000 34107
2 chr5 115000000 116000000 chr5 117000000 118000000 17335
3 chr5 115000000 116000000 chr5 118000000 119000000 14053
4 chr5 115000000 116000000 chr5 119000000 120000000 4137
5 chr5 116000000 117000000 chr5 116000000 117000000 73396
6 chr5 116000000 117000000 chr5 117000000 118000000 47427
7 chr5 116000000 117000000 chr5 118000000 119000000 16125
8 chr5 116000000 117000000 chr5 119000000 120000000 3642
9 chr5 117000000 118000000 chr5 117000000 118000000 80458
10 chr5 117000000 118000000 chr5 118000000 119000000 25105
11 chr5 117000000 118000000 chr5 119000000 120000000 5394
12 chr5 118000000 119000000 chr5 118000000 119000000 104536
13 chr5 118000000 119000000 chr5 119000000 120000000 27214
14 chr5 119000000 120000000 chr5 119000000 120000000 114135
>>> A1 = c.matrix().fetch('chr1')
>>> A2 = c.matrix().fetch('chr3:10,000,000-20,000,000')
>>> A3 = c.matrix().fetch( ('chr3', 10000000, 20000000) )
>>> A4 = c.matrix().fetch('chr2', 'chr3')
>>> A5 = c.matrix().fetch('chr3:10M-20M', 'chr3:35M-40M')
Dask
~~~~
Dask data structures provide a way to manipulate and distribute computations on larger-than-memory data using familiar APIs.
The experimental ``read_table`` function can be used to generate a dask dataframe backed by the pixel table of a cooler as follows:
.. code-block:: python
>>> from cooler.sandbox.dask import read_table
>>> df = daskify(c.filename, 'pixels')
>>> df
Dask DataFrame Structure:
bin1_id bin2_id count
npartitions=223
0 int64 int64 int64
9999999 ... ... ...
... ... ... ...
2219999999 ... ... ...
2220472929 ... ... ...
Dask Name: daskify, 223 tasks
>>> df = cooler.annotate(df, c.bins(), replace=False)
>>> df
Dask DataFrame Structure:
chrom1 start1 end1 weight1 chrom2 start2 end2 weight2 bin1_id bin2_id count
npartitions=31
None object int64 int64 float64 object int64 int64 float64 int64 int64 int64
None ... ... ... ... ... ... ... ... ... ... ...
... ... ... ... ... ... ... ... ... ... ... ...
None ... ... ... ... ... ... ... ... ... ... ...
None ... ... ... ... ... ... ... ... ... ... ...
Dask Name: getitem, 125 tasks
>>> df = df[df.chrom1 == df.chrom2]
>>> grouped = df.groupby(df.bin2_id - df.bin1_id)
>>> x = grouped['count'].sum()
>>> x
Dask Series Structure:
npartitions=1
None int64
None ...
Name: count, dtype: int64
Dask Name: series-groupby-sum-agg, 378 tasks
>>> x.compute()
0 476155231
1 284724453
2 139952477
3 96520218
4 71962080
5 56085850
6 45176881
7 37274367
8 31328555
9 26781986
10 23212616
11 20366934
12 18066135
13 16159826
14 14584058
15 13249443
16 12117854
17 11149845
...
Learn more about the `Dask `_ project.
Create a scool file
-------------------
The creation of a single-cell cooler file is similar to a regular cooler file. Each cell needs to have a name, bin table and a pixel table.
All cells must have the same dimensions, and the bins and pixels needs to be provided as two dicts with the cell names as keys.
.. code-block:: python
>>> name_pixel_dict = {'cell1': pixels_cell1, 'cell2': pixels_cell2, 'cell3': pixels_cell3}
>>> name_bins_dict = {'cell1': bins_cell1, 'cell2': bins_cell2, 'cell3': bins_cell3}
>>> cooler.create_scool('single_cell_cool.scool', name_bins_dict, name_pixel_dict)
To read the content, each individual cell must be handled as a regular cool file.
.. code-block:: python
>> content_of_scool = cooler.fileops.list_coolers('single_cell_cool.scool')
['/', '/cells/cell1', '/cells/cell2', '/cells/cell3']
>>> c1 = cooler.Cooler('single_cell_cool.scool::cells/cell1')
>>> c2 = cooler.Cooler('single_cell_cool.scool::cells/cell2')
>>> c3 = cooler.Cooler('single_cell_cool.scool::cells/cell3')
cooler-0.9.3/docs/conf.py 0000664 0000000 0000000 00000022754 14477647226 0015272 0 ustar 00root root 0000000 0000000 #!/usr/bin/env python
#
# cooler documentation build configuration file, created by
# sphinx-quickstart on Sun Jan 17 11:53:23 2016.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import os
import re
import sys
from unittest import mock
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
# sys.path.insert(0, os.path.abspath('.'))
# sys.path.insert(0, os.path.abspath(".."))
# autodoc_mock_imports = [
# 'numpy',
# 'scipy',
# 'pandas',
# 'h5py',
# 'dask',
# 'cytoolz',
# ]
MOCK_MODULES = [
"cytoolz",
"dask",
"dask.base",
"dask.array",
"dask.dataframe",
"dask.dataframe.core",
"dask.dataframe.utils",
"h5py",
"numpy",
"pandas",
"pandas.algos",
"pandas.api",
"pandas.api.types",
"scipy",
"scipy.sparse",
"sparse",
]
for mod_name in MOCK_MODULES:
sys.modules[mod_name] = mock.Mock()
# -- Project information -----------------------------------------------------
# General information about the project.
project = "cooler"
copyright = "2016-2019, Nezar Abdennur"
author = "Nezar Abdennur"
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
def _get_version():
init = os.path.join("..", "src", "cooler", "_version.py")
with open(init) as fh:
text = fh.read()
version = re.search(
r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', text, re.MULTILINE
).group(1)
return version
# The full version, including alpha/beta/rc tags.
release = _get_version()
# The short X.Y version.
version = release.rsplit(".", maxsplit=1)[0]
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.doctest",
"sphinx.ext.todo",
"sphinx.ext.coverage",
"sphinx.ext.mathjax",
"sphinx.ext.ifconfig",
"sphinx.ext.viewcode",
"sphinx.ext.autosummary",
"sphinx.ext.napoleon", # 'numpydoc'
"recommonmark",
]
numpydoc_show_class_members = False
napoleon_use_rtype = False
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
source_suffix = {
".rst": "restructuredtext",
".md": "markdown",
}
# source_parsers = {".md": "recommonmark.parser.CommonMarkParser"}
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"
# A list of ignored prefixes for module index sorting.
# modindex_common_prefix = []
# If true, keep warnings as "system message" paragraphs in the built documents.
# keep_warnings = False
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False
master_doc = "index"
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = "sphinx_rtd_theme"
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
# html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
# html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# " v documentation".
# html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
# html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
# html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
# html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ["_static"]
# Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied
# directly to the root of the documentation.
# html_extra_path = []
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
# html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
# html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
# html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
# html_additional_pages = {}
# If false, no module index is generated.
# html_domain_indices = True
# If false, no index is generated.
# html_use_index = True
# If true, the index is split into individual pages for each letter.
# html_split_index = False
# If true, links to the reST sources are added to the pages.
# html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
# html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
# html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
# html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
# html_file_suffix = None
# Language to be used for generating the HTML full-text search index.
# Sphinx supports the following languages:
# 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
# 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr'
# html_search_language = 'en'
# A dictionary with options for the search language support, empty by default.
# Now only 'ja' uses this config value
# html_search_options = {'type': 'default'}
# The name of a javascript file (relative to the configuration directory) that
# implements a search results scorer. If empty, the default will be used.
# html_search_scorer = 'scorer.js'
# Output file base name for HTML help builder.
htmlhelp_basename = "coolerdoc"
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
# # The paper size ('letterpaper' or 'a4paper').
# 'papersize': 'letterpaper',
# # The font size ('10pt', '11pt' or '12pt').
# 'pointsize': '10pt',
# # Additional stuff for the LaTeX preamble.
# 'preamble': '',
# # Latex figure (float) alignment
# 'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, "cooler.tex", "cooler Documentation", "Nezar Abdennur", "manual"),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
# latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
# latex_use_parts = False
# If true, show page references after internal links.
# latex_show_pagerefs = False
# If true, show URL addresses after external links.
# latex_show_urls = False
# Documents to append as an appendix to all manuals.
# latex_appendices = []
# If false, no module index is generated.
# latex_domain_indices = True
# -- Options for manual page output ---------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [(master_doc, "cooler", "cooler Documentation", [author], 1)]
# If true, show URL addresses after external links.
# man_show_urls = False
# -- Options for Texinfo output -------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(
master_doc,
"cooler",
"cooler Documentation",
author,
"cooler",
"One line description of project.",
"Miscellaneous",
),
]
# Documents to append as an appendix to all manuals.
# texinfo_appendices = []
# If false, no module index is generated.
# texinfo_domain_indices = True
# How to display URL addresses: 'footnote', 'no', or 'inline'.
# texinfo_show_urls = 'footnote'
# If true, do not generate a @detailmenu in the "Top" node's menu.
# texinfo_no_detailmenu = False
cooler-0.9.3/docs/cooler_logo.png 0000664 0000000 0000000 00000032030 14477647226 0016770 0 ustar 00root root 0000000 0000000 ‰PNG
IHDR – ïÝ“E pHYs .# .#x¥?v tEXtSoftware Adobe ImageReadyqÉe<