pax_global_header 0000666 0000000 0000000 00000000064 14173526301 0014514 g ustar 00root root 0000000 0000000 52 comment=45eddc08e64478ae0b52d1fe32c3a9529879c68b
pybedtools-0.9.0/ 0000775 0000000 0000000 00000000000 14173526301 0013706 5 ustar 00root root 0000000 0000000 pybedtools-0.9.0/.github/ 0000775 0000000 0000000 00000000000 14173526301 0015246 5 ustar 00root root 0000000 0000000 pybedtools-0.9.0/.github/workflows/ 0000775 0000000 0000000 00000000000 14173526301 0017303 5 ustar 00root root 0000000 0000000 pybedtools-0.9.0/.github/workflows/main.yml 0000664 0000000 0000000 00000010231 14173526301 0020747 0 ustar 00root root 0000000 0000000 name: main
on: [push]
jobs:
build-and-test:
strategy:
matrix:
python-version: [3.6, 3.7, 3.8, 3.9]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: git setup
# Set up git and export env vars to be used in later steps.
# Note the unconventional mechanism for exporting envs by appending to
# $GITHUB_ENV.
id: git-setup
run: |
git config --global user.email "action@github.com"
git config --global user.name "GitHub Action"
echo "BRANCH=${GITHUB_REF##*/}" >> $GITHUB_ENV
echo "WORKDIR=$(pwd)" >> $GITHUB_ENV
- name: cythonize and pip
# Convert .pyx files to .cpp and package into sdist tarball.
#
# This only requires Cython, no other dependencies.
run: |
eval "$(conda shell.bash hook)"
conda create -p ./cython-env -y cython
conda activate ./cython-env
python setup.py clean cythonize sdist
(cd dist && pip install pybedtools-*.tar.gz && cd $TMPDIR && python -c 'import pybedtools; print(pybedtools.__file__)')
conda deactivate
- name: conda env and install locally
# Set up conda and install pybedtools into that env
#
# NOTE: Tests require *.so files that are created by installing the
# package, otherwise we get:
#
# ModuleNotFoundError: No module named 'pybedtools.cbedtools'
#
# We could install from the source repo dir. However this may inadvertently
# rely on files that are in the source repo but not in the actual sdist
# package. So we extract the sdist tarball to another location and install
# from there.
#
# Tests below will operate in this newly-installed directory.
run: |
eval "$(conda shell.bash hook)"
conda create -y -p ./test-env \
--channel conda-forge \
--channel bioconda python=${{ matrix.python-version }} \
--file requirements.txt \
--file test-requirements.txt \
--file optional-requirements.txt
conda activate ./test-env
mkdir -p /tmp/pybedtools-uncompressed
cd /tmp/pybedtools-uncompressed
tar -xf $WORKDIR/dist/pybedtools-*.tar.gz
cd pybedtools-*
pip install -e .
python -c 'import pybedtools; print(pybedtools.__file__)'
ls *
- name: tests
# Run pytest and sphinx doctests
run: |
eval "$(conda shell.bash hook)"
cd $WORKDIR
conda activate ./test-env
# Move to extracted tarball dir, see above notes
cd /tmp/pybedtools-uncompressed/pybedtools-*
pytest -v --doctest-modules
pytest -v pybedtools/test/genomepy_integration.py
cp -r $WORKDIR/docs .
(cd docs && make clean doctest)
- name: build-docs
# Build docs and commit to gh-pages branch. Note that no push happens
# unless we're on the master branch
run: |
eval "$(conda shell.bash hook)"
conda activate ./test-env
# Move to extracted tarball dir, see above notes
cd /tmp/pybedtools-uncompressed/pybedtools-*
(cd docs && make html)
git clone \
--single-branch \
--branch gh-pages "https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY" \
/tmp/docs
rm -rf /tmp/docs/*
cp -r docs/build/html/* /tmp/docs
touch /tmp/docs/.nojekyll
cd /tmp/docs
git add .
if git diff --cached --quiet; then
echo "no changes, nothing to commit"
else
git commit -m 'update docs'
fi
cd $WORKDIR
- name: docs artifact
# Upload built docs as an artifact for inspection, even on PRs
uses: actions/upload-artifact@v2
with:
name: docs
path: /tmp/docs
- name: push docs to gh-pages branch
# Push docs to gh-pages if this test is running on master branch
if: ${{ (github.ref == 'refs/heads/master') && (matrix.python-version == 3.8) }}
run: |
cd /tmp/docs
git push "https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY" gh-pages
cd $WORKDIR
pybedtools-0.9.0/.gitignore 0000664 0000000 0000000 00000000434 14173526301 0015677 0 ustar 00root root 0000000 0000000 .tox*
*.coverage
virtual-env/*
dist/*
build/*
docs/_build/doctrees/*
docs/build
*.swp
docs/source/README.rst
pybedtools.egg-info/*
*.pyc
*.so
*.fai
*.egg
pybedtools/_Window.c
pybedtools/cbedtools.cpp
pybedtools/featurefuncs.cpp
*.bak
cythonize.dat
docs/source/autodocs/*.rst
MANIFEST
pybedtools-0.9.0/LICENSE.txt 0000664 0000000 0000000 00000002134 14173526301 0015531 0 ustar 00root root 0000000 0000000 Wrapper -- and more -- for BEDtools
Copyright (c) 2010-2022 Ryan Dale
All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
pybedtools-0.9.0/MANIFEST.in 0000664 0000000 0000000 00000000535 14173526301 0015447 0 ustar 00root root 0000000 0000000 recursive-include pybedtools/include/ *
include README.rst
include LICENSE.txt
include ez_setup.py
recursive-include pybedtools/test/data *
recursive-include pybedtools/test *
include docs/Makefile
include docs/make.bat
recursive-include pybedtools *.cpp
recursive-include pybedtools *.c
recursive-exclude * __pycache__
recursive-exclude * *.py[co]
pybedtools-0.9.0/README.rst 0000664 0000000 0000000 00000005041 14173526301 0015375 0 ustar 00root root 0000000 0000000 Overview
--------
.. image:: https://travis-ci.org/daler/pybedtools.png?branch=master
:target: https://travis-ci.org/daler/pybedtools
.. image:: https://badge.fury.io/py/pybedtools.svg?style=flat
:target: http://badge.fury.io/py/pybedtools
.. image:: https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg
:target: http://bioconda.github.io
The `BEDTools suite of programs `_ is widely
used for genomic interval manipulation or "genome algebra". `pybedtools` wraps
and extends BEDTools and offers feature-level manipulations from within
Python.
See full online documentation, including installation instructions, at
http://daler.github.io/pybedtools/.
Why `pybedtools`?
-----------------
Here is an example to get the names of genes that are <5 kb away from
intergenic SNPs:
.. code-block:: python
from pybedtools import BedTool
snps = BedTool('snps.bed.gz') # [1]
genes = BedTool('hg19.gff') # [1]
intergenic_snps = snps.subtract(genes) # [2]
nearby = genes.closest(intergenic_snps, d=True, stream=True) # [2, 3]
for gene in nearby: # [4]
if int(gene[-1]) < 5000: # [4]
print gene.name # [4]
Useful features shown here include:
* `[1]` support for all BEDTools-supported formats (here gzipped BED and GFF)
* `[2]` wrapping of all BEDTools programs and arguments (here, `subtract` and `closest` and passing
the `-d` flag to `closest`);
* `[3]` streaming results (like Unix pipes, here specified by `stream=True`)
* `[4]` iterating over results while accessing feature data by index or by attribute
access (here `[-1]` and `.name`).
In contrast, here is the same analysis using shell scripting. Note that this
requires knowledge in Perl, bash, and awk. The run time is identical to the
`pybedtools` version above:
.. code-block:: bash
snps=snps.bed.gz
genes=hg19.gff
intergenic_snps=/tmp/intergenic_snps
snp_fields=`zcat $snps | awk '(NR == 2){print NF; exit;}'`
gene_fields=9
distance_field=$(($gene_fields + $snp_fields + 1))
intersectBed -a $snps -b $genes -v > $intergenic_snps
closestBed -a $genes -b $intergenic_snps -d \
| awk '($'$distance_field' < 5000){print $9;}' \
| perl -ne 'm/[ID|Name|gene_id]=(.*?);/; print "$1\n"'
rm $intergenic_snps
See the `Shell script comparison `_ in the docs
for more details on this comparison, or keep reading the full documentation at
http://daler.github.io/pybedtools.
pybedtools-0.9.0/build-docs.sh 0000775 0000000 0000000 00000001350 14173526301 0016271 0 ustar 00root root 0000000 0000000 #!/bin/bash
# Build docs here, then copy them over to a fresh, temporary checkout of the
# gh-pages branch from github. Then upload 'em. After a few minutes, you'll see
# the newly-generated docs at daler.github.io/pybedtools.
# Ideas from:
# http://executableopinions.readthedocs.org/en/latest/labs/gh-pages/gh-pages.html
set -e
set -x
(cd docs && make html)
HERE=$(pwd)
MSG="Adding gh-pages docs for $(git log --abbrev-commit | head -n1)"
DOCSOURCE=$HERE/docs/build/html
TMPREPO=/tmp/docs
rm -rf $TMPREPO
mkdir -p -m 0755 $TMPREPO
git clone git@github.com:daler/pybedtools.git $TMPREPO
cd $TMPREPO
git checkout gh-pages
cp -r $DOCSOURCE/* $TMPREPO
touch $TMPREPO/.nojekyll
git add -A
git commit -m "$MSG"
git push origin gh-pages
cd $HERE
pybedtools-0.9.0/dev-requirements.txt 0000664 0000000 0000000 00000000066 14173526301 0017750 0 ustar 00root root 0000000 0000000 cython
matplotlib
numpydoc
pandas
pyyaml
sphinx
pysam
pybedtools-0.9.0/docker/ 0000775 0000000 0000000 00000000000 14173526301 0015155 5 ustar 00root root 0000000 0000000 pybedtools-0.9.0/docker/full-test.sh 0000775 0000000 0000000 00000000434 14173526301 0017434 0 ustar 00root root 0000000 0000000 #!/bin/bash
set -e
set -x
# Build the configured containers and run tests in each.
#
#
containers="pbt-test-py2 pbt-test-py3"
for container in $containers; do
docker build -t $container $container
docker run -it -v $(pwd)/..:/opt/pybedtools $container docker/harness.sh
done
pybedtools-0.9.0/docker/harness.sh 0000775 0000000 0000000 00000001230 14173526301 0017153 0 ustar 00root root 0000000 0000000 #!/bin/bash
set -e
set -x
# Use Agg backend for matplotlib, which avoids X server errors
mplrc=$(python -c 'from matplotlib import matplotlib_fname as mf; print(mf())')
mkdir -p ~/.config/matplotlib
cp $mplrc ~/.config/matplotlib
sed -i "s/: Qt4Agg/: Agg/g" ~/.config/matplotlib/matplotlibrc
# The repo should have been exported to the container as /opt/pybedtools.
#
# Since docker runs as root, and we want to keep the exported data intact, we
# make a copy and do a completely clean installation on that copy before
# running tests.
cd ~
cp -r /opt/pybedtools .
cd pybedtools
python setup.py clean
python setup.py develop
nosetests
(cd docs && make doctest)
pybedtools-0.9.0/docker/pbt-test-py2/ 0000775 0000000 0000000 00000000000 14173526301 0017427 5 ustar 00root root 0000000 0000000 pybedtools-0.9.0/docker/pbt-test-py2/Dockerfile 0000664 0000000 0000000 00000001630 14173526301 0021421 0 ustar 00root root 0000000 0000000 FROM ubuntu:14.04
MAINTAINER Ryan Dale
RUN apt-get update && apt-get install -y \
build-essential \
bzip2 \
ca-certificates \
git \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender1 \
mysql-client \
wget \
zlib1g-dev
RUN echo 'export PATH=/opt/conda/bin:$PATH' > /etc/profile.d/conda.sh && \
wget --quiet https://repo.continuum.io/miniconda/Miniconda-3.10.1-Linux-x86_64.sh && \
/bin/bash /Miniconda-3.10.1-Linux-x86_64.sh -b -p /opt/conda && \
rm Miniconda-3.10.1-Linux-x86_64.sh && \
/opt/conda/bin/conda install --yes conda==3.14.1
ENV PATH /opt/conda/bin:$PATH
RUN conda install -c daler \
pip \
cython \
matplotlib \
nose \
numpydoc \
pip \
pandas \
pyyaml \
sphinx \
pysam
RUN conda install -c daler \
tabix \
bedtools=2.25.0
ENV DISPLAY=:0
ENV LANG C.UTF-8
WORKDIR /opt/pybedtools
pybedtools-0.9.0/docker/pbt-test-py3/ 0000775 0000000 0000000 00000000000 14173526301 0017430 5 ustar 00root root 0000000 0000000 pybedtools-0.9.0/docker/pbt-test-py3/Dockerfile 0000664 0000000 0000000 00000001633 14173526301 0021425 0 ustar 00root root 0000000 0000000 FROM ubuntu:14.04
MAINTAINER Ryan Dale
RUN apt-get update && apt-get install -y \
build-essential \
bzip2 \
ca-certificates \
git \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender1 \
mysql-client \
wget \
zlib1g-dev
RUN echo 'export PATH=/opt/conda/bin:$PATH' > /etc/profile.d/conda.sh && \
wget --quiet https://repo.continuum.io/miniconda/Miniconda3-3.10.1-Linux-x86_64.sh && \
/bin/bash /Miniconda3-3.10.1-Linux-x86_64.sh -b -p /opt/conda && \
rm Miniconda3-3.10.1-Linux-x86_64.sh && \
/opt/conda/bin/conda install --yes conda==3.14.1
ENV PATH /opt/conda/bin:$PATH
RUN conda install -c daler \
pip \
cython \
matplotlib \
nose \
numpydoc \
pip \
pandas \
pyyaml \
sphinx \
pysam
RUN conda install -c daler \
tabix \
bedtools=2.25.0
ENV DISPLAY=:0
ENV LANG C.UTF-8
WORKDIR /opt/pybedtools
pybedtools-0.9.0/docs/ 0000775 0000000 0000000 00000000000 14173526301 0014636 5 ustar 00root root 0000000 0000000 pybedtools-0.9.0/docs/Makefile 0000664 0000000 0000000 00000011360 14173526301 0016277 0 ustar 00root root 0000000 0000000 # Makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
#BUILDDIR = ../../pybedtools-docs
BUILDDIR = build
PDF = build/html/pybedtools_manual.pdf
PDFBUILDDIR = /tmp/doc-pybedtools
PYTHONPATH=$PYTHONPATH:..
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest
help:
@echo "Please use \`make ' where is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
clean:
-rm -rf $(BUILDDIR)/* source/autodocs/*.rst
html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
touch $(BUILDDIR)/html/.nojekyll
dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."
json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."
htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pybedtools.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pybedtools.qhc"
devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/pybedtools"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pybedtools"
@echo "# devhelp"
epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."
latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(PDFBUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
make -C $(PDFBUILDDIR)/latex all-pdf
cp $(PDFBUILDDIR)/latex/*.pdf $(PDF)
@echo "pdflatex finished; see PDF files in $(PDF)"
text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."
man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."
linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."
pybedtools-0.9.0/docs/README.rst 0000664 0000000 0000000 00000000107 14173526301 0016323 0 ustar 00root root 0000000 0000000 Compiled HTML docs can be found at http://pythonhosted.org/pybedtools/
pybedtools-0.9.0/docs/make.bat 0000664 0000000 0000000 00000010026 14173526301 0016242 0 ustar 00root root 0000000 0000000 @ECHO OFF
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set BUILDDIR=build
set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
if NOT "%PAPER%" == "" (
set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
)
if "%1" == "" goto help
if "%1" == "help" (
:help
echo.Please use `make ^` where ^ is one of
echo. html to make standalone HTML files
echo. dirhtml to make HTML files named index.html in directories
echo. singlehtml to make a single large HTML file
echo. pickle to make pickle files
echo. json to make JSON files
echo. htmlhelp to make HTML files and a HTML help project
echo. qthelp to make HTML files and a qthelp project
echo. devhelp to make HTML files and a Devhelp project
echo. epub to make an epub
echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
echo. text to make text files
echo. man to make manual pages
echo. changes to make an overview over all changed/added/deprecated items
echo. linkcheck to check all external links for integrity
echo. doctest to run all doctests embedded in the documentation if enabled
goto end
)
if "%1" == "clean" (
for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
del /q /s %BUILDDIR%\*
goto end
)
if "%1" == "html" (
%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/html.
goto end
)
if "%1" == "dirhtml" (
%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
goto end
)
if "%1" == "singlehtml" (
%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
goto end
)
if "%1" == "pickle" (
%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
echo.
echo.Build finished; now you can process the pickle files.
goto end
)
if "%1" == "json" (
%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
echo.
echo.Build finished; now you can process the JSON files.
goto end
)
if "%1" == "htmlhelp" (
%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
echo.
echo.Build finished; now you can run HTML Help Workshop with the ^
.hhp project file in %BUILDDIR%/htmlhelp.
goto end
)
if "%1" == "qthelp" (
%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
echo.
echo.Build finished; now you can run "qcollectiongenerator" with the ^
.qhcp project file in %BUILDDIR%/qthelp, like this:
echo.^> qcollectiongenerator %BUILDDIR%\qthelp\pybedtools.qhcp
echo.To view the help file:
echo.^> assistant -collectionFile %BUILDDIR%\qthelp\pybedtools.ghc
goto end
)
if "%1" == "devhelp" (
%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
echo.
echo.Build finished.
goto end
)
if "%1" == "epub" (
%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
echo.
echo.Build finished. The epub file is in %BUILDDIR%/epub.
goto end
)
if "%1" == "latex" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
echo.
echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "text" (
%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
echo.
echo.Build finished. The text files are in %BUILDDIR%/text.
goto end
)
if "%1" == "man" (
%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
echo.
echo.Build finished. The manual pages are in %BUILDDIR%/man.
goto end
)
if "%1" == "changes" (
%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
echo.
echo.The overview file is in %BUILDDIR%/changes.
goto end
)
if "%1" == "linkcheck" (
%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
echo.
echo.Link check complete; look for any errors in the above output ^
or in %BUILDDIR%/linkcheck/output.txt.
goto end
)
if "%1" == "doctest" (
%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
echo.
echo.Testing of doctests in the sources finished, look at the ^
results in %BUILDDIR%/doctest/output.txt.
goto end
)
:end
pybedtools-0.9.0/docs/source/ 0000775 0000000 0000000 00000000000 14173526301 0016136 5 ustar 00root root 0000000 0000000 pybedtools-0.9.0/docs/source/3-brief-examples.rst 0000664 0000000 0000000 00000006742 14173526301 0021744 0 ustar 00root root 0000000 0000000
.. _BEDTools: http://github.com/arq5x/bedtools
.. _3examples:
Three brief examples
--------------------
Here are three examples to show typical usage of :mod:`pybedtools`. More
info can be found in the docstrings of :mod:`pybedtools` methods and in the
:ref:`tutorial`.
You can also check out :ref:`shell_comparison` for a simple
example of how :mod:`pybedtools` can improve readability of your code with no
loss of speed compared to bash scripting.
.. note::
Please take the time to read and understand the conventions
:mod:`pybedtools` uses to handle files with different coordinate systems
(e.g., 0-based BED files vs 1-based GFF files) which are described
:ref:`here `.
In summary,
* **Integer** values representing start/stop are *always in 0-based
coordinates*, regardless of file format. This means that all
:class:`Interval` objects can be treated identically, and greatly
simplifies underlying code.
* **String** values representing start/stop will use coordinates appropriate
for the format (1-based for GFF; 0-based for BED).
Example 1: Save a BED file of intersections, with track line
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This example saves a new BED file of intersections between your files `mydata/snps.bed` and
`mydata/exons.bed`, adding a track line to the output::
>>> import pybedtools
>>> a = pybedtools.BedTool('mydata/snps.bed')
>>> a.intersect('mydata/exons.bed').saveas('snps-in-exons.bed', trackline="track name='SNPs in exons' color=128,0,0")
Example 2: Intersections for a 3-way Venn diagram
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This example gets values for a 3-way Venn diagram of overlaps. This
demonstrates operator overloading of :class:`BedTool` objects. It assumes that
you have the files `a.bed`, `b.bed`, and `c.bed` in your current working
directory. If you'd like to use example files that come with
:mod:`pybedtools`, then replace strings like `'a.bed'` with
`pybedtools.example_filename('a.bed')`, which will retrieve the absolute path
to the example data file.::
>>> import pybedtools
>>> # set up 3 different bedtools
>>> a = pybedtools.BedTool('a.bed')
>>> b = pybedtools.BedTool('b.bed')
>>> c = pybedtools.BedTool('c.bed')
>>> (a-b-c).count() # unique to a
>>> (a+b-c).count() # in a and b, not c
>>> (a+b+c).count() # common to all
>>> # ... and so on, for all the combinations.
For more, see the :mod:`pybedtools.scripts.venn_mpl` and
:mod:`pybedtools.scripts.venn_gchart` scripts, which wrap this functionality in
command-line scripts to create Venn diagrams using either matplotlib or Google
Charts API respectively. Also see the :mod:`pybedtools.contrib.venn_maker`
module for a flexible interface to the VennDiagram `R` package.
.. _third example:
Example 3: Count reads in introns and exons, in parallel
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This example shows how to count the number of reads in introns and exons in
parallel. It is somewhat more involved, but illustrates several additional
features of :mod:`pybedtools` such as:
* BAM file support (for more, see :ref:`bam`)
* indexing into Interval objects (for more, see :ref:`intervals`)
* filtering (for more, see :ref:`filtering`)
* streaming (for more, see :ref:`BedTools as iterators`)
* ability to use parallel processing
.. literalinclude:: example_3
For more on using :mod:`pybedtools`, continue on to the :ref:`tutorial` . . .
pybedtools-0.9.0/docs/source/FAQs.rst 0000664 0000000 0000000 00000014121 14173526301 0017461 0 ustar 00root root 0000000 0000000 .. include:: includeme.rst
FAQs
====
.. note::
More detailed answers to these questions can often be found on the `Issues
`_ page.
"Does pybedtools have a simple reader/writer for BED files?"
------------------------------------------------------------
While `pybedtools` designed to work with BEDTools, the reading/writing/parsing
function can be easily used for other things.
Simply iterating over a :class:`BedTool` object will parse each line into
a :class:`Interval` object. You can then manipulate this or access the fields
as needed.
For example::
x = pybedtools.example_bedtool('a.bed')
for interval in x:
# do something with interval
However, if you're planning on writing the results out to file, it may be more
useful to write a transformation function along with the :meth:`BedTool.each`
method. This allows you to read, transform, and write all in one command::
def my_func(f):
"""
adds 10 bp to the stop
"""
f.stop += 1
return f
pybedtools.example_bedtool('a.bed')\
.each(my_func)\
.saveas('out.bed')
Another useful idiom is creating a generator function. For example, here we
change the name field to reflect the value of a counter. We create a BedTool
from the iterator and then save it::
def gen():
counter = 0
for i in pybedtools.example_bedtool('a.bed'):
i.name = str(counter)
counter += 1
yield i
pybedtools.BedTool(gen()).saveas('counted.bed')
See :ref:`saveresults` for more on saving the results.
"Can I create a BedTool object from an existing list?"
------------------------------------------------------
Sure, the :class:`BedTool` constructor will figure it out::
items = [
('chr1', 100, 200),
('chr1', 500, 600),
]
x = pybedtools.BedTool(items)
"I'm getting an empty BedTool"
------------------------------
Check to make sure you're not consuming a BedTool generator. Note that
:meth:`BedTool.filter` and :meth:`BedTool.each` will return a generator BedTool
object. Keep in mind that checking the length of a generator BedTool will
completely consume it.
It's probably best to save intermediate versions to file using
:meth:`BedTool.saveas`. If you don't provide a filename, it'll save to an
automatically cleaned up tempfile::
my_bedtool\
.filter(my_filter_func)\
.saveas()\
.intersect(y)\
.filter(lambda x: len(x) > 1000)\
.saveas('filtered-intersected-large.bed')
"I'm getting a MalformedBedLineError"
-------------------------------------
This error can be raised by BEDTools itself. Typical reasons are that start
> end, or the fields are not tab-delimited.
You can try the :func:`pybedtools.remove_invalid` function to clean up your
file, or manually edit the offending lines.
"I get a segfault when iterating over a BedTool object"
-------------------------------------------------------
`Issue #88 `_ which
addresses this issue -- in summary, Cython's handling of iterators works
unexpectedly. It's best to call the `next()` method explicitly when doing
complex manipulations on an iterating :class:`BedTool`.
"Can I add extra information to FASTA headers when using BedTool.sequence()?"
-----------------------------------------------------------------------------
Since BEDTools adds the feature name to the FASTA header, you can manipulate
the feature name on the fly with a custom modifier function::
def fields2name(f):
"replace GFF featuretype field with the attributes field"
f[2] = f[-1]
return f
import pybedtools
g = pybedtools.BedTool("my.gff").each(fields2name).sequence(fi='my.fasta')
print open(g.seqfn).readline()
"Too many files open" error
---------------------------
Sometimes you may get the error::
* Too many files open -- please submit a bug report so that this can be fixed
This error occurs because you have hit your operating system's limit on the
number of open files. This usually happens when creating many :class:`BedTool`
objects, often within a for-loop.
In general, **try to create as few** :class:`BedTool` **objects as you can**. Every time you
create a :class:`BedTool` object, you create a new open file. There is usually
a BEDTools program that already does what you want, and will do it faster.
For example, say we want to:
* start with all annotations
* only consider exons
* write a file containing just exons
* count reads in multiple BAM files for each exon
Here is a first draft. Note that the for-loop creates a :class:`BedTool`
object each iteration, and the `result` is yet another :class:`BedTool`. This
will version will raise the "Too many files open" error.
.. code-block:: python
# This version will be slow and, with many exons, will raise the "Too many
# files open" error
import pybedtools
all_features = pybedtools.BedTool('annotations.gff')
fout = open('exons.gff', 'w')
for feature in all_features:
if feature[2] != 'exon':
continue
fout.write(str(feature))
bt = pybedtools.BedTool([feature])
result = bt.multi_bam_coverage(bams=['reads1.bam', 'reads2.bam'])
# ...do something with result
fout.close()
In contrast, it would be better to construct an "exon-only" :class:`BedTool` at
the beginning. The :meth:`BedTool.filter` method is a good way to do this.
Then, there is only one call to :meth:`BedTool.multi_bam_coverage`.
In this version there are only 3 :class:`BedTool` objects: the
one that opens `annotations.gff`, the one that uses `exons.gff` after it is
saved, and `result`. (Note that the one created from the filter operation is
a "streaming" BedTool, so there is no open file that will contribute to the
total).
.. code-block:: python
# This is the recommended way.
import pybedtools
exons = pybedtools.BedTool('annotations.gff')\
.filter(lambda x: x[2] == 'exon')\
.saveas('exons.gff')
result = exons.multi_bam_coverage(bams=['reads1.bam', 'reads2.bam'])
# ...do something with result
pybedtools-0.9.0/docs/source/_static/ 0000775 0000000 0000000 00000000000 14173526301 0017564 5 ustar 00root root 0000000 0000000 pybedtools-0.9.0/docs/source/_static/custom.css 0000664 0000000 0000000 00000000060 14173526301 0021604 0 ustar 00root root 0000000 0000000 div.highlight-python pre {
font-size: 0.7em;
}
pybedtools-0.9.0/docs/source/_templates/ 0000775 0000000 0000000 00000000000 14173526301 0020273 5 ustar 00root root 0000000 0000000 pybedtools-0.9.0/docs/source/_templates/layout.html 0000664 0000000 0000000 00000000121 14173526301 0022470 0 ustar 00root root 0000000 0000000 {% extends '!layout.html' %}
{% block relbar2 %}
{{ relbar() }}
{% endblock %}
pybedtools-0.9.0/docs/source/autodoc_source.rst 0000664 0000000 0000000 00000025116 14173526301 0021713 0 ustar 00root root 0000000 0000000
.. _autodoc:
.. _pybedtools reference:
.. currentmodule:: pybedtools
:mod:`pybedtools` Reference
===========================
The following tables summarize the methods and functions; click on a method or
function name to see the complete documentation.
.. contents::
:class:`BedTool` creation
-------------------------
The main :class:`BedTool` documentation, with a list of all methods in
alphabetical order at the bottom. For more details, please see :ref:`creating
a BedTool`.
.. autosummary::
:toctree: autodocs
pybedtools.bedtool.BedTool
`BEDTools` wrappers
-------------------
These methods wrap `BEDTools` programs for easy use with Python; you can then
use the other :mod:`pybedtools` functionality for further manipulation and
analysis.
The documentation of each of these methods starts with
:mod:`pybedtools`-specific documentation, possibly followed by an example.
Finally, the `BEDTools` help is copied verbatim from whatever version was
installed when generating these docs.
In general the `BEDTool` wrapper methods adhere to the :ref:`Design principles`:
* :ref:`temp principle`
* :ref:`similarity principle`
* :ref:`version principle`
* :ref:`default args principle`
.. autosummary::
:toctree: autodocs
pybedtools.bedtool.BedTool.intersect
pybedtools.bedtool.BedTool.window
pybedtools.bedtool.BedTool.closest
pybedtools.bedtool.BedTool.coverage
pybedtools.bedtool.BedTool.map
pybedtools.bedtool.BedTool.genome_coverage
pybedtools.bedtool.BedTool.merge
pybedtools.bedtool.BedTool.cluster
pybedtools.bedtool.BedTool.complement
pybedtools.bedtool.BedTool.subtract
pybedtools.bedtool.BedTool.slop
pybedtools.bedtool.BedTool.flank
pybedtools.bedtool.BedTool.sort
pybedtools.bedtool.BedTool.random
pybedtools.bedtool.BedTool.shuffle
pybedtools.bedtool.BedTool.annotate
pybedtools.bedtool.BedTool.multi_intersect
pybedtools.bedtool.BedTool.union_bedgraphs
pybedtools.bedtool.BedTool.pair_to_bed
pybedtools.bedtool.BedTool.pair_to_pair
pybedtools.bedtool.BedTool.bam_to_bed
pybedtools.bedtool.BedTool.to_bam
pybedtools.bedtool.BedTool.bedpe_to_bam
pybedtools.bedtool.BedTool.bed6
pybedtools.bedtool.BedTool.bam_to_fastq
pybedtools.bedtool.BedTool.sequence
pybedtools.bedtool.BedTool.mask_fasta
pybedtools.bedtool.BedTool.nucleotide_content
pybedtools.bedtool.BedTool.multi_bam_coverage
pybedtools.bedtool.BedTool.tag_bam
pybedtools.bedtool.BedTool.jaccard
pybedtools.bedtool.BedTool.reldist
pybedtools.bedtool.BedTool.overlap
pybedtools.bedtool.BedTool.links
pybedtools.bedtool.BedTool.igv
pybedtools.bedtool.BedTool.window_maker
pybedtools.bedtool.BedTool.groupby
pybedtools.bedtool.BedTool.expand
Other :class:`BedTool` methods
------------------------------
These methods are some of the ways in which :mod:`pybedtools` extend the
BEDTools suite.
Feature-by-feature operations
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Methods that operate on a feature-by-feature basis to modify or filter features
on the fly.
.. autosummary::
:toctree: autodocs
pybedtools.bedtool.BedTool.each
pybedtools.bedtool.BedTool.filter
pybedtools.bedtool.BedTool.split
pybedtools.bedtool.BedTool.truncate_to_chrom
pybedtools.bedtool.BedTool.remove_invalid
The :mod:`pybedtools.featurefuncs` module contains some commonly-used functions
that can be passed to :meth:`BedTool.each`:
.. currentmodule:: pybedtools
.. autosummary::
:toctree:
pybedtools.featurefuncs.three_prime
pybedtools.featurefuncs.five_prime
pybedtools.featurefuncs.TSS
pybedtools.featurefuncs.extend_fields
pybedtools.featurefuncs.center
pybedtools.featurefuncs.midpoint
pybedtools.featurefuncs.normalized_to_length
pybedtools.featurefuncs.rename
pybedtools.featurefuncs.greater_than
pybedtools.featurefuncs.less_than
pybedtools.featurefuncs.normalized_to_length
pybedtools.featurefuncs.rename
pybedtools.featurefuncs.bedgraph_scale
pybedtools.featurefuncs.add_color
pybedtools.featurefuncs.gff2bed
pybedtools.featurefuncs.bed2gff
Searching for features
~~~~~~~~~~~~~~~~~~~~~~
These methods take a single interval as input and return the intervals of the
BedTool that overlap.
This can be useful when searching across many BED files for a particular
coordinate range -- for example, they can be used identify all binding sites,
stored in many different BED files, that fall within a gene's coordinates.
.. autosummary::
:toctree: autodocs
pybedtools.bedtool.BedTool.all_hits
pybedtools.bedtool.BedTool.any_hits
pybedtools.bedtool.BedTool.count_hits
pybedtools.bedtool.BedTool.tabix_intervals
pybedtools.bedtool.BedTool.tabix
pybedtools.bedtool.BedTool.bgzip
:class:`BedTool` introspection
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
These methods provide information on the :class:`BedTool` object.
If using :meth:`BedTool.head`, don't forget that you can index into
:class:`BedTool` objects, too.
.. autosummary::
:toctree: autodocs
pybedtools.bedtool.BedTool.head
pybedtools.bedtool.BedTool.count
pybedtools.bedtool.BedTool.field_count
pybedtools.bedtool.BedTool.file_type
Randomization helpers
~~~~~~~~~~~~~~~~~~~~~
Helper methods useful for assessing empirical instersection
distributions between interval files.
.. autosummary::
:toctree: autodocs
pybedtools.bedtool.BedTool.parallel_apply
pybedtools.bedtool.BedTool.randomstats
pybedtools.bedtool.BedTool.randomintersection
pybedtools.bedtool.BedTool.randomintersection_bp
pybedtools.bedtool.BedTool.random_subset
pybedtools.bedtool.BedTool.random_jaccard
pybedtools.bedtool.BedTool.random_op
Managing :class:`BedTool` objects on disk
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
These methods are used to specify where to save results from :class:`BedTool`
operations.
.. autosummary::
:toctree: autodocs
pybedtools.bedtool.BedTool.saveas
pybedtools.bedtool.BedTool.moveto
Misc operations
~~~~~~~~~~~~~~~
Methods that can't quite be categorized into the above sections.
.. autosummary::
:toctree: autodocs
pybedtools.bedtool.BedTool.cat
pybedtools.bedtool.BedTool.at
pybedtools.bedtool.BedTool.absolute_distance
pybedtools.bedtool.BedTool.cut
pybedtools.bedtool.BedTool.total_coverage
pybedtools.bedtool.BedTool.with_attrs
pybedtools.bedtool.BedTool.as_intervalfile
pybedtools.bedtool.BedTool.introns
pybedtools.bedtool.BedTool.set_chromsizes
pybedtools.bedtool.BedTool.print_sequence
pybedtools.bedtool.BedTool.save_seqs
pybedtools.bedtool.BedTool.seq
pybedtools.bedtool.BedTool.liftover
pybedtools.bedtool.BedTool.colormap_normalize
pybedtools.bedtool.BedTool.relative_distance
Module-level functions
----------------------
Working with example files
~~~~~~~~~~~~~~~~~~~~~~~~~~
:mod:`pybedtools` comes with many example files. Here are some useful
functions for accessing them.
.. autosummary::
:toctree: autodocs
pybedtools.bedtool.example_bedtool
pybedtools.filenames.list_example_files
pybedtools.filenames.example_filename
Creating :class:`Interval` objects from scratch
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
:class:`Interval` objects are the core object in :mod:`pybedtools` to represent
a genomic interval, written in Cython for speed.
.. autosummary::
:toctree: autodocs
pybedtools.cbedtools.Interval
pybedtools.cbedtools.create_interval_from_list
:mod:`pybedtools` setup and config
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Use these functions right after importing in order to use custom paths or to
clean up the temp directory.
.. autosummary::
:toctree: autodocs
pybedtools.helpers.set_bedtools_path
pybedtools.helpers.get_tempdir
pybedtools.helpers.set_tempdir
pybedtools.helpers.cleanup
pybedtools.debug_mode
Working with "chromsizes" or assembly coordinate files
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Many `BEDTools` programs need "genome files" or "chromsizes" files so as to
remain within the coordinates of the assembly you're working on. These
functions help manage these files.
.. autosummary::
:toctree: autodocs
pybedtools.helpers.get_chromsizes_from_ucsc
pybedtools.helpers.chromsizes
pybedtools.helpers.chromsizes_to_file
Performing operations in parallel (multiprocessing)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autosummary::
:toctree: autodocs
pybedtools.parallel.parallel_apply
:mod:`pybedtools.contrib`
-------------------------
The :mod:`pybedtools.contrib` module contains higher-level code that leverages
:class:`BedTool` objects for common analyses.
Plotting
~~~~~~~~
Plotting results from BEDTools/pybedtools operations is very useful for
exploring and understanding the tools as well as for teaching purposes.
.. autosummary::
:toctree: autodocs
pybedtools.contrib.plotting.Track
pybedtools.contrib.plotting.TrackCollection
pybedtools.contrib.plotting.binary_heatmap
pybedtools.contrib.plotting.binary_summary
pybedtools.contrib.plotting.BedToolsDemo
pybedtools.contrib.plotting.ConfiguredBedToolsDemo
Working with bigWig files
~~~~~~~~~~~~~~~~~~~~~~~~~
At this time, :mod:`pybedtools` does not support reading bigWig files, only
creating them via UCSC utilities.
.. autosummary::
:toctree: autodocs
pybedtools.contrib.bigwig.bam_to_bigwig
pybedtools.contrib.bigwig.bedgraph_to_bigwig
pybedtools.contrib.bigwig.wig_to_bigwig
Working with bigBed files
~~~~~~~~~~~~~~~~~~~~~~~~~
.. autosummary::
:toctree: autodocs
pybedtools.contrib.bigbed.bigbed
pybedtools.contrib.bigbed.bigbed_to_bed
:class:`IntersectionMatrix`
~~~~~~~~~~~~~~~~~~~~~~~~~~~
The :class:`IntersectionMatrix` class makes it easy to intersect a large number
of interval files with each other.
.. autosummary::
:toctree: autodocs
pybedtools.contrib.IntersectionMatrix
:mod:`contrib.venn_maker`
~~~~~~~~~~~~~~~~~~~~~~~~~
The :mod:`venn_maker` module helps you make Venn diagrams using the R package
`VennDiagram `_.
Note that Venn diagrams are not good for when you have nested intersections.
See the docs for :func:`pybedtools.contrib.venn_maker.cleaned_intersect` and
its source for more details.
.. autosummary::
:toctree: autodocs
pybedtools.contrib.venn_maker
pybedtools.contrib.venn_maker.venn_maker
pybedtools.contrib.venn_maker.cleaned_intersect
:mod:`contrib.long_range_interaction`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autosummary::
:toctree: autodocs
pybedtools.contrib.long_range_interaction.tag_bedpe
pybedtools.contrib.long_range_interaction.cis_trans_interactions
pybedtools-0.9.0/docs/source/autodocs/ 0000775 0000000 0000000 00000000000 14173526301 0017757 5 ustar 00root root 0000000 0000000 pybedtools-0.9.0/docs/source/autodocs/pybedtools.contrib.plotting.Track.rst 0000664 0000000 0000000 00000007035 14173526301 0027243 0 ustar 00root root 0000000 0000000 pybedtools.contrib.plotting.Track
=================================
.. currentmodule:: pybedtools.contrib.plotting
.. autoclass:: Track
.. automethod:: __init__
.. rubric:: Methods
.. autosummary::
~Track.__init__
~Track.add_callback
~Track.add_checker
~Track.autoscale
~Track.autoscale_None
~Track.changed
~Track.check_update
~Track.contains
~Track.convert_xunits
~Track.convert_yunits
~Track.draw
~Track.findobj
~Track.format_cursor_data
~Track.get_agg_filter
~Track.get_alpha
~Track.get_animated
~Track.get_array
~Track.get_capstyle
~Track.get_children
~Track.get_clim
~Track.get_clip_box
~Track.get_clip_on
~Track.get_clip_path
~Track.get_cmap
~Track.get_contains
~Track.get_cursor_data
~Track.get_dashes
~Track.get_datalim
~Track.get_edgecolor
~Track.get_edgecolors
~Track.get_facecolor
~Track.get_facecolors
~Track.get_figure
~Track.get_fill
~Track.get_gid
~Track.get_hatch
~Track.get_joinstyle
~Track.get_label
~Track.get_linestyle
~Track.get_linestyles
~Track.get_linewidth
~Track.get_linewidths
~Track.get_offset_position
~Track.get_offset_transform
~Track.get_offsets
~Track.get_path_effects
~Track.get_paths
~Track.get_picker
~Track.get_pickradius
~Track.get_rasterized
~Track.get_sizes
~Track.get_sketch_params
~Track.get_snap
~Track.get_transform
~Track.get_transformed_clip_path_and_affine
~Track.get_transforms
~Track.get_url
~Track.get_urls
~Track.get_visible
~Track.get_window_extent
~Track.get_xlims
~Track.get_zorder
~Track.have_units
~Track.hitlist
~Track.is_figure_set
~Track.is_transform_set
~Track.pchanged
~Track.pick
~Track.pickable
~Track.properties
~Track.remove
~Track.remove_callback
~Track.set
~Track.set_agg_filter
~Track.set_alpha
~Track.set_animated
~Track.set_antialiased
~Track.set_antialiaseds
~Track.set_array
~Track.set_capstyle
~Track.set_clim
~Track.set_clip_box
~Track.set_clip_on
~Track.set_clip_path
~Track.set_cmap
~Track.set_color
~Track.set_contains
~Track.set_dashes
~Track.set_edgecolor
~Track.set_edgecolors
~Track.set_facecolor
~Track.set_facecolors
~Track.set_figure
~Track.set_gid
~Track.set_hatch
~Track.set_joinstyle
~Track.set_label
~Track.set_linestyle
~Track.set_linestyles
~Track.set_linewidth
~Track.set_linewidths
~Track.set_lw
~Track.set_norm
~Track.set_offset_position
~Track.set_offsets
~Track.set_path_effects
~Track.set_paths
~Track.set_picker
~Track.set_pickradius
~Track.set_rasterized
~Track.set_sizes
~Track.set_sketch_params
~Track.set_snap
~Track.set_transform
~Track.set_url
~Track.set_urls
~Track.set_verts
~Track.set_verts_and_codes
~Track.set_visible
~Track.set_zorder
~Track.to_rgba
~Track.update
~Track.update_from
~Track.update_scalarmappable
.. rubric:: Attributes
.. autosummary::
~Track.aname
~Track.axes
~Track.midpoint
~Track.mouseover
~Track.stale
~Track.sticky_edges
~Track.zorder
pybedtools-0.9.0/docs/source/changes.rst 0000664 0000000 0000000 00000076632 14173526301 0020316 0 ustar 00root root 0000000 0000000 .. include:: includeme.rst
Changelog
=========
Changes in v0.9
---------------
2022-01-23
The biggest change is that pybedtools is now under the MIT license, following
the lead of BEDTools itself.
Bugfixes:
* Bugfix: `Interval` objects representing VCF lines now have their `start`
attribute correctly zero-based, as indicated in the docs and consistent with
other 1-based formats. See `#355 `_.
* Bugfix: Manually creating `Interval` objects using the `otherfields` argument
now correctly converts to C++ strings. See `#348
`_.
* Bugfix: Workaround for `BedTool.intersect` which in some versions of BEDTools
requires a specific order of arguments. Fixes `#345
`_ and also is a better way
of addressing `#81 `_.
Code cleanup:
* Removed some remnants of Python 2.7 support (thanks @DavidCain)
* Updates to setup.py classifiers to better reflect state of code (thanks @DavidCain)
* Sorted filenames in setup.py to improve reproducibility of build (thanks @lamby)
* Tests converted to run on GitHub Actions (see `#339
`_).
Changes in v0.8.2
-----------------
2021-03-13: Minor updates
* Removed scripts directory from installed version. These are still available
in the GitHub repo, but were causing import issues with Python 3.9 and were
not well-used in the first place.
* Bugfix: unicode is better handled in gzipped files (thanks @naumenko-sa, see
`#320 `_)
* Bugfix: correctly ignore warnings even with capital letters (thanks
@JureZmrzlikar, see `#326 `_)
* Bugfix/improvements: update tests and code to work with Python 3.8 (see `#324
`_). Also addresses `#322
`_.
* Improvement: updates tests to work with bedtools v2.30
* Improvement: integration of `genomepy
`_, which if installed will
help retrieve chromsizes files for less commonly used assemblies (thanks
@simonvh, see `#323 `_)
Changes in v0.8.1
-----------------
2019-12-27: This version has minor updates and bugfixes:
* Bugfix: Fixes to `pbt_plotting_example.py` (thanks Steffen Möllera @smoe)
* Bugfix: Using `BedTool.saveas()` when a BedTool already points to a compressed file
correctly saves (`#308 `_)
* Improvement: Deprecate `pandas.read_table` (thanks André F. Rendeiro
@afrendeiro)
* Improvement: overhaul testing on travis-ci
* Improvement: BedTool objects support either strings or Path objects (`#287
`_, thanks @drchsl)
* Improvement: MySQL host can be configured (`#301
`_, thanks André F. Rendeiro
@afrendeiro)
* Improvement: Better version string parsing (`#289
`_, thanks Steffen Möllera
@smoe), fixes `#275 `_ and others.
* Improvement: Proper CRAM support: `#307 `_
* Improvement: Raise an exception when the `-b` argument to `BedTool.intersect` has more
than 510 files (`#303 `_)
* `*.h` files now included in the distribution (thanks @blaiseli)
* Improvement: Update tests to work with bedtools v2.29.2
Changes in v0.8.0
-----------------
This version further improves testing, improves the way C++ files are included
in the package, and fixes many long-standing bugs.
* Using pytest framework rather than nose for testing
* Updated `setup.py` to be more robust and to more clearly separate
"cythonization" into .cpp files
* Updated test harness for testing in independent conda environments
* All issue tests go in their own test module
* Included Python 3.7 tests (note that at the time of this writing, pysam is
not yet available on bioconda so that dependency is pip-installed in the
test) (`#254 `_)
* Updated tests to reflect BEDTool 2.27.1 output (`#260
`_`#261
`_)
* Removed the `contrib.classifier` module, which has been unsupported for
a while.
* More informative error messages for UCSC tools if they're missing (`#227
`_)
* BedTool objects that are the result of operations that create files that are
not BED/GTF/GFF/BAM can be more easily converted to pandas.DataFrame with
`disable_auto_names=True` arg to `BedTool.to_dataframe()` (`#258
`_)
* Added aliases to existing methods to match current BEDTools commands, e.g.
the `BedTool.nucleotide_content` method can now also be called using
`BedTool.nuc` which is consistent with the `bedtools nuc` command line name.
* New wrapper for `bedtools split`. The wrapper method is called `splitbed` to
maintain backwards compatibility because `pybedtools.BedTool` objects have
long had a `split` method that splits intervals based on a custom function.
* New wrapper for `bedtools spacing`.
* `BedTool.from_dataframe` handles NaN in dataframes by replacing with `"."`,
and is more explicit about kwargs that are passed to `pandas.DataFrame`
(`#257 `_)
* Raise FileNotFoundError when on Python 3 (thanks Gosuke Shibahara, (`#255
`_)
* Relocated BEDTools header and .cpp files to the `pybedtools/include`
directory, so they can more easily be linked to from external packages
(`#253 `_)
* Add test for (`#118 `_)
* `BedTool.tabix_contigs` will list the sequence names indexed by tabix
(`#180 `_)
* `BedTool.tabix_intervals` will return an empty generator if the coordinates
provided are not indexed, unless `check_coordinates=True` in which case the
previous behavior of raising a ValueError is triggered (`#181
`_)
* Bugfix: Avoid "ResourceWarning: unclosed file" in `helpers.isBGZIP` (thanks
Stephen Bush)
* Bugfix: Interval objects created directly no longer have their filetype set
to None (`#217 `_)
* Bugfix: Fixed the ability to set paths and reload module afterwards (`#218
`_, `#220
`_, `#222
`_)
* Bugfix: `BedTool.head()` no longer uses an IntervalIterator (which would
check to make sure lines are valid BED/GTF/GFF/BAM/SAM). Instead, it simply
prints the first lines of the underlying file.
* Bugfix: functions passed to `BedTool.filter` and `BedTool.each` no longer
silently pass ValueErrors (`#231
`_)
* Bugfix: Fixed IndexError in IntervalIterator if there was an empty line (`#233
`_)
* Bugfix: Add additional constraint to SAM file detection to avoid incorrectly
detecting a BED file as SAM (`#246
`_)
* Bugfix: accessing Interval.fields after accessing Interval.attrs no longer
raises ValueError (`#246 `_)
Changes in v0.7.10
------------------
Various bug fixes and some minor feature additions:
* Support for comma-separated lists for `mapBed` (thanks Chuan-Sheng Foo)
* Support many calls to `tabix_intervals` without hitting a "Too many open
files" error (`#190 `_)
* Clarify documentation for `genome_coverage` when used with default
parameters (`#113 `_)
* Ignore stderr from samtools on older zlib versions (`#209 `_, thanks Gert Hulselmans)
* Support fetching all regions from a chromosome (`#201 `_, thanks Matt Stone)
* Add wrapper for `shiftBed` (`#200 `_, thanks Saket Choudhary)
* Fix `truncate_to_chrom` in Python 3 (`#203 `_, thanks Saulius Lukauskas)
* When making bigWigs, use `bedSort` to ensure the sort order matches that expected by UCSC tools (`#178 `_)
* Fix newline handling of `pysam.ctabix.TabixIterator` output (`#196 `_)
Changes in v0.7.9
-----------------
Minor bugfix release:
* add `contrib.bigwig.bigwigtobedgraph` (thanks Justin Fear)
* fix `BedTool.seq()` in Python 3
* fix intron creation (`#182 `_, thanks @mmendez12)
* add `six` as an explicit requirement (`#184 `_, thanks @jrdemasi)
* improvements to setup (``_)
* make pandas fully optional
Changes in v0.7.8
-----------------
* Be more careful about BAM vs bgzipped files (#168)
* `BedTool.bgzip` now preserves the header when sorting
* In Python 3, parsed BEDTools help string is decoded properly
* Ensure integer number of processes in Python 3 (thanks Illa Shamovsky)
* Add details on IOError messages for broken pipe error
* Make converting to pandas.DataFrames easier with non-standard BED files (thanks Panos Firmpas)
Changes in v0.7.7
-----------------
* Chromsizes for dm6 and mm10 assemblies added to `genome_registry`
* Better Python 3 compatibility in the `long_range_interaction` module
* New `featurefuncs.UniqueID` class, useful for ensuring all features in a file
have a unique ID in their name field.
* Fix error message when a specified genome file doesn't exist (thanks Saket Choudhary)
Changes in v0.7.6
-----------------
* New module `pybedtools.contrib.long_range_interaction` for working with
HiC-like data.
Changes in v0.7.5
-----------------
* When using tabix-indexed files, `tabix` and `bgzip` are no longer required to
be installed separately. Only `pysam` is needed.
* Recent BEDTools releases support multiple files for the `-b` argument of
`bedtools intersect`. This version of `pybedtools` now supports multiple
files as well. Note that it is still possible to provide a list of strings
representing intervals as the `b` argument to `BedTool.intersect`. To
differentiate between a list of intervals and a list of filenames, the first
item converted into an `Interval` object; if it fails then consider the items
to be filenames; otherwise assume strings representing intervals. This check
only occurs if the `b` argument is a list or tuple; other iterable types are
always assumed to be intervals.
Changes in v0.7.4
-----------------
Bugfix release.
- fix `#147 `_ so that warnings
are simply passed to the user without raising exceptions
- in setup.py, allow depedencies to have "loose" versions with suffixes like
"rc1"
- fix in `BedTool.cat()` on empty files (thanks Brad Chapman (`PR #149
`_)
Changes in v0.7.1
-----------------
This is largely a bugfix release with the following changes:
- fix for some BAM headers (thanks Gabriel Platt)
- unified IntervalIterator to address some streaming issues (fixes #143)
- fix bug where `__add__` was not re-raising exceptions (thanks Brad Chapman
and Dan Halligan)
Changes in v0.7.0
-----------------
This release reflects a major upgrade in the underlying code in order to
support both Python 2 and Python 3 using the same code. Aside from trivial
things like converting print statements to functions and using `next()` instead
of `.next()`, this required a substantial rewrite to support the way strings
are handled in Python 3 (in Cython and wrapped C++) and how relative modules
work.
Importantly, after converting them to Python 2- and 3-compatible syntax *all
previous tests pass* so to the end user should not notice any differences
except those noted below.
Strings from Interval fields are unicode
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
For consistency between Python 2 and 3, all strings from Interval objects are
now unicode. That is, in Python 2, previously we would get this::
>>> a = pybedtools.example_bedtool('a.bed')
>>> a[0].name
'feature1'
Now, we get this::
>>> a = pybedtools.example_bedtool('a.bed')
>>> a[0].name
u'feature1'
samtools no longer a dependency
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The dependency for samtools has been removed, which simplifies the installation
process. Instead, `pysam` is used for handling BAM files.
In order for existing tests to pass, `pysam.AlignedSegment` objects are
currently converted to `pybedtools.Interval` objects when iterating over a BAM
file. This will come at a performance cost if you are iterating over all reads
in a BAM file using the `pybedtools.BAM` object.
In the future, iterating over a BAM file will yield `pysam.AlignedSegment`
objects directly, but for now you can use the `pybedtools.BAM.pysam_bamfile`
attribute to access the underlying `pysam.AlignmentFile`
Cython no longer a dependency
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The Cythonized ``.cxx`` files are now shipped with the `pybedtools`
distribution, so Cython is no longer a requirement for installation.
You will however need to have Cython installed if you're developing pybedtools.
Remote BAM support clarification
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Previously, `pybedtools` was able to support remote BAMs by loosely wrapping
samtools, but BAM files still needed to be fully downloaded to disk before
using with BEDTools. This was done automatically, but through an inefficient
mechanism.
Pysam does support remote BAMs, and as before, a BAM file needs to be created
on disk for use with BEDTools. But now this needs to be explicitly done by the
user, which should result in better performance.
Iterating over intervals
~~~~~~~~~~~~~~~~~~~~~~~~
Previously, when iterating over a `BedTool` object, different machinery would
be invoked depending on whether the BedTool was pointing to a file (a
cbedtools.IntervalFile would be invoked), to another iterator of Interval
objects, or to a stream like from the stdout of a BEDTools call
(cbedtools.IntervalIterator in both cases).
Everything is now an IntervalIterator, simplifying the path towards
performance optimization.
gzip support
~~~~~~~~~~~~
Thanks to Saulius Lukauskas, gzip handling is now improved, and calling
`BedTool.saveas()` with a `.gz` extension on the filename will automatically
compress the output.
Docker
~~~~~~
In the github repo there is a `docker` directory containing Dockerfiles to set
up isolated testing environments. These Dockerfiles also demonstrate how to set
up a complete environment starting from a base Ubuntu install.
Tests
~~~~~
All tests from v0.6.9 (which was Python 2 only) have been made Python 2/3
compatible and all previous tests pass.
If you have docker installed, from the top level directory, you can run the
full tests like this::
cd docker
./full-tests.sh
This will build docker containers for Python 2 and Python 3 with all
depedencies, export the parent directory to the container, and run the test
suite.
Conda packages
~~~~~~~~~~~~~~
You can now install the latest versions of tabix, bedtools, pysam, and
pybedtools from conda, dramatically speeding up installation time. These
mechanisms are used for automated testing as well (see the ``condatest.sh``
script in the github repo).
To use these packages in your own environment(s), specify the `daler` conda
channel like this::
conda install -c daler pybedtools
Note that this will not install BEDTools or tabix unless you explicitly say
so::
conda install -c daler pybedtools bedtools tabix
.. note::
This currently only works on Linux; contributions to Mac conda recipes (see
the `conda` dir in the github repo) would be welcomed.
Changes in v0.6.9
-----------------
Minor bug fix release.
* improved the automatic field name handling when converting an interval file to
a `pandas DataFrame`.
* fixed a bug in `IntervalFile` methods `all_hits`, `any_hits` and `count_hits`
where zero-length features were being counted multiple times (thanks Brent
Pedersen and Kyle Smith)
* bgzip and tabix paths can now be configured separately (thanks Rob Beagrie)
* fixed a bug where streaming BAM files were read fully into memory (thanks
Alexey Sergushichev)
Changes in v0.6.8
-----------------
Bugfix: Thanks to Gabriel Pratt, `pybedtools` is no longer plagued by open filehandles
in the C code causing the notorious "Too many files open" error.
Changes in v0.6.7
-----------------
Now compatible with BEDTools v2.21.0.
The one exception is that the new `bedtools intersect` functionality that
allows multiple `-b` files is not yet implemented in `pybedtools`.
New features:
* `BedTool.fisher()` wraps the new BEDTools `fisher` tool. The result is
an object containing parsed results.
* `BedTool.colormap_normalize()` accepts a `percentile` argument, useful when
applying colormaps to data with a handful of extreme outliers
* `BedTool.to_datafame()` converts a `BedTool` object into a `pandas.DataFrame`
with columns named after the appropriate fields for the filetype (thanks
Radhouane Aniba for the suggestion)
* `BedTool.tail()` to complement `BedTool.head()` (thanks Radhouane Aniba for
the suggestion)
* Add hg38 and hg38.default chromsizes
Minor bug fixes:
* Ensure tuple-like args to `parallel_apply` (fixes #109)
* Temp fix for BEDTools v2.20.0 which required the `-w` arg to come before the
`-s` arg in `bedtools makewindows` (#81)
* Better (i.e., UCSC Genome Browser-compliant) defaults for `featurefuncs.expand_fields`.
* Fix for BedTool.all_hits() and any_hits() which will now show hits for
zero-length features intersecting with other zero-length features with the
same coordinates.
Changes in v0.6.6
-----------------
This is a compatibility release, updated for BEDTools v2.20.0.
There is one API change that affects the behavior of overloaded operators (that
is, using `+` and `-` with BedTool objects) when one of the BedTool objects
represents an empty file.
Assume `a` is a BedTool object representing a regular BED file but `b` is
empty. Previously:
* a + b = a
* b + a = b
* a - b = a
* b - a = a
* b - b = b
* a + a = a
The following changes have been made (indicated in **bold**), which hopefully
make more logical sense:
* **a + b = b**
* b + a = b
* a - b = a
* **b - a = b**
* b - b = b
* a + a = a
Changes in v0.6.5
-----------------
This is a minor bug-fix release:
* Fix for BedTool.all_hits() and any_hits() which will now show hits for
zero-length features with the same coordinates, like the rest of BEDTools.
* Improved error-handling to avoid Python interpreter crashing in cases when
a BED file on the filesystem becomes unavailable after a BedTool object has
been created for it.
Changes in v0.6.4
-----------------
* Full integration with BEDTools v2.18. This includes some compatibility fixes
for the new buffered output capabilities of BEDTool `intersect` and wrapping
the new `bedtools sample` tool.
* Overloaded operators (`+` and `-`) allow empty files as input, even using
BEDTools v2.18+.
* Travis-CI builds now use BEDTools v2.18+ for tests.
* Fix for :func:`pybedtools.featurefuncs.midpoint` (thanks ny-shao)
* Fix to :meth:`BedTool.randomstats` (thanks Michael Reschen)
Changes in v0.6.3
-----------------
* New :mod:`pybedtools.parallel` module for working with many operations in
parallel. See the docs for :func:`pybedtools.parallel.parallel_apply` for
details.
* :func:`pybedtools.contrib.bigbed.bigbed` for converting to bigBed format,
along with auto-SQL creation as needed.
* New function :func:`pybedtools.contrib.bigbed.bigbed_to_bed`, so now bigBed
-> BED and BED -> bigBed interconversions are trivial.
* Support for remote BAMs by passing `remote=True` when creating
a :class:`BedTool` object
* New method :meth:`BedTool.at` for subsetting a BedTool by a set of (sorted)
indexes.
* New functions :func:`featurefuncs.gff2bed` and :func:`featurefuncs.bed2gff`
for use with :meth:`BedTool.each`, for easy converting GFF/GTF to BED
* New function :func:`add_color` for applying matplotlib colormaps to BED
files; see also new method :meth:`pybedtools.BedTool.colormap_normalize`.
* :class:`pybedtools.plotting.BinaryHeatmap` class for working with results
from :meth:`BedTool.multi_intersect`.
* :meth:`BedTool.each` now also has some filter capabilities (if provided
function's return value evaluates to False, feature will be skipped)
* Better detection for samtools (thanks Luca Beltrame)
* Expand BEDToolsError (thanks Ryan Layer)
* Creating a BedTool from a list of intervals now saves to temp file instead of treating
like a consume-once iterator (#73)
* Various fixes to keyword arg handling to match semantics of BEDTools.
* Command line help and improved docs for the `peak_pie.py` script.
* Fix to GFF attributes (thanks Libor Mořkovský)
* Fix to labels in :mod:`pybedtools.contrib.venn_maker.py` (thanks Luca
Pinello)
* Make the naive scaling (to million mapped reads) in
:func:`pybedtools.contrib.bigwig.bam_to_bigwiq` optional.
* Fix for :meth:`BedTool.cat` to handle cases where at least one input is an
empty file
* Removed SciPy dependency
* Every commit is built with Travis-CI for continuous integration testing of
changes to source code.
Changes in v0.6.2
-----------------
* Wrapped new tools available in BEDTools 2.17: :meth:`BedTool.jaccard` and
:meth:`BedTool.reldist` wrap the new `bedtools jaccard` and `bedtools
reldist` respectively.
* Initial implementations of building blocks for computing statistics,
:meth:`BedTool.absolute_distance` and :meth:`BedTool.relative_distance`
* :func:`pybedtools.featurefuncs.three_prime`,
:func:`pybedtools.featurefuncs.five_prime`, and
:func:`pybedtools.featurefuncs.TSS` modifier functions that can be passed to
:meth:`BedTool.each`
* :func:`pybedtools.contrib.plotting.binary_heatmap` for visualizing results
from :meth:`BedTool.multi_intersect`
* Fixed a long-standing issue where streaming :class:`BedTool` objects did not
close their open file handles (stdout). When working with many (i.e. tens
of thousands) files, this caused the operating system to hit its open file
limit. This is now fixed.
* :meth:`BedTool.random_op`, a new mechanism for implementing operations that
you would like to apply over tens of thousands of shuffled interval files.
This makes it easy to extend the existing :mod:`pybedtools` multiprocessing
functionality.
* :func:`pybedtools.contrib.bigwig.bam_to_bigwig`, a helper function to create
a libary-size-scaled bigWig file from an input BAM file.
* :class:`pybedtools.contrib.plotting.TrackCollection` class, which handles
plotting multiple files at once, using a provided "stylesheet" configuration
to tweak colors etc.
* :class:`pybedtools.contrib.plotting.BedToolsDemo` and
:class:`pybedtools.contrib.plotting.ConfiguredBedToolsDemo`, useful for
running many graphical demos of BEDTools operations using the same
"stylesheet" configuration. Run :file:`pybedtools/contrib/plotting.py` for
a demo.
* chromsizes dictionaries for common assemblies now have a `default` attribute,
which is an OrderedDict of a default set of chromosome. For example,
``pybedtools.chromsizes('hg19').default`` contains only the entries for the
autosomes and X and Y.
* :meth:`BedTool.cat` now works better with multiprocessing
* added `include_distribution` kwarg to :meth:`BedTool.randomstats`, which will
attach the full distribution of all the randomized files to the results
dictionary.
* New method implementing Jaccard statistic (with pvalue using randomizations):
:meth:`BedTool.random_jaccard`
* :func:`featurefuncs.extend_fields` helper function to pad fields with `'.'`,
useful for manipulating features with the :meth:`BedTool.each` method
* Fixed a bug where BAM files, when written to disk via :meth:`BedTool.saveas`,
were saved as SAM files.
* Better GTF/GFF detection, and if the input had quoted attribute values, then
the output will, too
* various minor bug fixes and improvments as documented in the github commit
logs....
Changes in v0.6.1
-----------------
* New :class:`pybedtools.contrib.plotting.Track` class allows plotting of
features with matplotlib. The `Track` class subclasses
`matplotlib.collections.PolyCollection`, making it rather fast for 1000s of
features.
* See the `scripts/pbt_plotting_example.py` script for a way of visually showing
the results of BEDTools operations . . . great for teaching BEDTools to new
users.
* New :meth:`BedTool.liftover` method (needs a chain file from UCSC and the
`liftover` program installed)
* :class:`BedTool` creation using tuples/lists of values -- everything is
converted to string before creating an :class:`Interval` object.
* bugfix: :meth:`BedTool.window_maker` now handles the `genome` kwarg correctly
* bugfix: `pybedtools.cleanup(remove_all=True)` now works correctly when using
the default temp dir
Changes in v0.6
---------------
* Overhaul in online documentation to hopefully make functionality easier to
find and/or discover. See :ref:`pybedtools reference` for summary tables of
the different parts of :mod:`pybedtools`; each entry is linked to further
class/method/function-specific docs. These more detailed docs also have
links to view the source code from within the HTML docs for more exploration.
* :func:`pybedtools.contrib.venn_maker` function that acts as an interface to
the VennDiagram R package -- just give it some BED files and it'll do the
rest.
* Debug mode -- :func:`pybedtools.debug_mode` -- for verbose logging messages.
* Fixed an open file leak (OSError: too many open files) that occured when
opening thousands of streaming bed files in a single session.
* Initial support for tabix files. Useful for extracting features from
a single region when you don't need a full intersection.
* New :mod:`pybedtools.contrib` module (in the spirit of Django's `contrib`)
where higher-level functionality will be built.
* :class:`pybedtools.contrib.Classifier` class for identifying the classes of
intervals. Useful for making pie charts of intronic/exonic/intergenic etc
classes of peaks. Note that this is somewhat redundant with the new `mapBed`
program in BEDTools.
* Experimental :class:`pybedtools.contrib.IntersectionMatrix` class for
handling pairwise intersections of a large number of interval files --
including a local sqlite3 database to avoid re-computing already up-to-date
results.
* :class:`Interval` objects are now hashable (it's just a hash of the string
representation) so that you can use them as dictionary keys.
* :meth:`BedTool.split` method, which accepts a function returning an iterable
of :class:`Interval` objects. The function is applied to each interval.
Useful for, say, splitting each gene into TSS, TTS, upstream and downstream
features.
* :meth:`BedTool.truncate_to_chrom` method, which truncates features to the
chromosome sizes of the provided genome. Useful for when you try uploading
a MACS-generated track to the UCSC genome browser, but it complains because
peak boundaries have been extended outside chromosome boundaries . . . this
method fixes the problem.
* :class:`BedTool` objects now have full functionality of :class:`IntervalFile`
objects -- that is, they have the methods :meth:`BedTool.any_hits`,
:meth:`BedTool.all_hits`, and :meth:`BedTool.count_hits` for doing
single-interval tests. Sometimes this will be faster than using the tabix
support, sometimes it won't -- it's best to try both, depending on your data.
* String representations of :class:`Interval` objects now have a newline at the
end, just like a raw lines from a BED/GFF/VCF file. Previously, this was
inconsistent and sometimes led to extra blank lines in "streaming"
:class:`BedTool` instances . . . which in turn led to problems with BEDTools
programs using the chromsweep algorithm.
* Concatentate multiple files with one call to :meth:`BedTool.cat` (thanks Jake
Biesinger)
* Wrapped previous BEDTools programs:
* `unionBedGraphs` (:meth:`BedTool.union_bedgraphs`)
* `pairToBed` (:meth:`BedTool.pair_to_bed`)
* `pairToPair` (:meth:`BedTool.pair_to_pair`)
* `bedpeToBam` (:meth:`BedTool.bedpe_to_bam`)
* Wrapped new BEDTools programs:
* `mapBed` (:meth:`BedTool.map`)
* `clusterBed` (:meth:`BedTool.cluster`)
* `randomBed` (:meth:`BedTool.random`)
* `multiIntersectBed` (:meth:`BedTool.multi_intersect`)
* `expandCols` (:meth:`BedTool.expand`)
* `windowMaker` (:meth:`BedTool.window_maker`)
* `bamToFastq` (:meth:`BedTool.bam_to_fastq`)
* Made venn_gchart and venn_mpl tests more stable
* Automatic documenting of which args are passed implicitly for BedTool method
calls
* More robust mechanisms for specifying custom paths for BEDTools installation
as well as optional tabix, samtools, and R installations. This makes it
easier to explicitly specify which versions of the tools to use.
* Improvements to GFF attributes: handle unescaped "=" (from sim4db GFFs) and
make Attribute class properly dict-like (thanks Libor Mořkovský)
Changes in v0.5.5
-----------------
* Use `additional_args` kwarg to pass arguments verbatim to the underlying
BEDTools programs. This is necessary for arguments like
`genomeCoverageBed`'s `-5` argument, since `5=True` is not a valid Python
expression. For example, you can use::
import pybedtools
a = pybedtools.example_bedtool('a.bed')
a.genome_coverage(bg=True, strand='+', genome='hg19', additional_args='-5')
* Brent Pedersen added support for just 2 BED files in the Venn diagram scripts
* :meth:`BedTool.all_hits` uses the underlying BEDTools C++ API to get all hits
in a file for a particular Interval::
a = pybedtools.example_bedtool('a.bed')
interval = Interval('chr1', 1, 5000)
a.all_hits(interval)
* New semantics for comparisons of Interval objects. Visual documentation of
this coming soon.
* More tests for latest BEDTools code
* Interval instances are now pickleable; they can now be used across processes
for parallel code.
Changes in v0.5
---------------
* support for running random intersections in parallel. See
:meth:`BedTool.randomstats` and :meth:`BedTool.randomintersection` (thanks,
Jake Biesinger)
* Cython `Interval.__copy__()` for compatibility with `copy` module
* `seek()` and `rewind()` methods for `IntervalFile` class, used for Aaron
Quinlan's new chromsweep algorithm (https://github.com/arq5x/chrom_sweep)
(thanks, Aaron)
* support and tests for new BEDTools programs `multiBamCov`, `tagBam`, and `nucBed`
* `output="out.bed"` kwarg for all wrapped methods for explicitly specifying
where to save output -- no more moving tempfiles
* docs improvements:
* direct comparison with a shell script to illustrate benefit of
`pybedtools`; see :ref:`shell_comparison`
* more installation details
* 0- and 1-based coordinates discussed early on (the 3 brief examples page,
:ref:`3examples`)
* development history and open collaboration model (see :ref:`devmodel`)
pybedtools-0.9.0/docs/source/conf.py 0000664 0000000 0000000 00000016756 14173526301 0017454 0 ustar 00root root 0000000 0000000 # -*- coding: utf-8 -*-
#
# pybedtools documentation build configuration file, created by
# sphinx-quickstart on Wed Dec 22 17:39:12 2010.
#
# This file is execfile()d with the current directory set to its containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys, os
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(0, os.path.abspath('../..'))
from pybedtools import __version__ as version
# -- General configuration -----------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.autosummary',
'sphinx.ext.doctest', 'sphinx.ext.todo',
'sphinx.ext.coverage','sphinx.ext.viewcode', 'numpydoc']
doctest_test_doctest_blocks = 'default'
# From http://stackoverflow.com/questions/12206334/\
# sphinx-autosummary-toctree-contains-refere\
# nce-to-nonexisting-document-warnings
numpydoc_show_class_members = False
# this is needed to get the autodoc_source.rst doctests to run
doctest_global_setup = """
from pybedtools import *
import pybedtools
"""
autosummary_generate = True
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix of source filenames.
source_suffix = '.rst'
# The encoding of source files.
#source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = 'pybedtools'
copyright = '2010-2015, Ryan Dale'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = version
# The full version, including alpha/beta/rc tags.
release = version
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = []
# The reST default role (used for this markup: `text`) to use for all documents.
default_role = 'file'
# If true, '()' will be appended to :func: etc. cross-reference text.
add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
#pygments_style = 'sphinx'
highlight_language = 'python'
html_use_smartypants = False
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# -- Options for HTML output ---------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#html_theme = 'nature'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = ['_themes']
# The name for this set of Sphinx documents. If None, it defaults to
# " v documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_domain_indices = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = ''
# Output file base name for HTML help builder.
htmlhelp_basename = 'pybedtoolsdoc'
# -- Options for LaTeX output --------------------------------------------------
# The paper size ('letter' or 'a4').
#latex_paper_size = 'letter'
# The font size ('10pt', '11pt' or '12pt').
#latex_font_size = '10pt'
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
('index', 'pybedtools.tex', 'pybedtools Documentation',
'Ryan Dale', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
latex_use_parts = True
# If true, show page references after internal links.
#latex_show_pagerefs = False
# If true, show URL addresses after external links.
#latex_show_urls = False
# Additional stuff for the LaTeX preamble.
#latex_preamble = ''
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_domain_indices = True
# -- Options for manual page output --------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'pybedtools', 'pybedtools Documentation',
['Ryan Dale'], 1)
]
pybedtools-0.9.0/docs/source/create-a-bedtool-tutorial.rst 0000664 0000000 0000000 00000003337 14173526301 0023646 0 ustar 00root root 0000000 0000000 .. include:: includeme.rst
Create a :class:`BedTool`
-------------------------
First, follow the :ref:`installation` instructions if you haven't already
done so to install both BEDTools_ and :mod:`pybedtools`.
Then import the :mod:`pybedtools` module and make a new :class:`BedTool`. A
:class:`BedTool` object encapsulates all of the available BEDTools programs and
makes them easier to use within Python. Most of the time when working with
:mod:`pybedtools` you'll be using :class:`BedTool` objects. In general, a
single :class:`BedTool` object points to an interval file (BED, GFF, GTF, VCF,
SAM, or BAM format).
::
>>> import pybedtools
>>> # use a BED file that ships with pybedtools...
>>> a = pybedtools.example_bedtool('a.bed')
>>> # ...or use your own by passing a filename
>>> a = pybedtools.BedTool('peaks.bed')
This documentation uses example files that ship with :mod:`pybedtools`. To
access these files from their installation location, we use the
:func:`example_bedtool` function. This is convenient because if you copy-paste
the examples, they will work. When using the :func:`example_bedtool` function,
the resulting :class:`BedTool` object will point to the corresponding file in
the `test/data` directory of your :mod:`pybedtools` installation. If you would
rather learn using your own files, just pass the filename to a new
:class:`BedTool`, like the above example.
You can use any file that BEDTools_ supports -- this includes BED, VCF,
GFF, and gzipped versions of any of these. See :ref:`Creating a BedTool`
for more on the different ways of creating a :class:`BedTool`, including
from iterators and directly from a string.
Now, let's see how to do a common task performed on BED files: intersections.
pybedtools-0.9.0/docs/source/default-arguments.rst 0000664 0000000 0000000 00000006240 14173526301 0022321 0 ustar 00root root 0000000 0000000 .. currentmodule:: pybedtools
Default arguments
=================
Recall in the earlier :ref:`intersections` section that we passed the `u=True` argument to :meth:`a.intersect`:
.. doctest::
>>> import pybedtools
>>> a = pybedtools.example_bedtool('a.bed')
>>> b = pybedtools.example_bedtool('b.bed')
>>> a_with_b = a.intersect(b, u=True)
Let's do the same thing but use different variable names for the :class:`BedTool` objects so that
the next section is less confusing:
.. doctest::
>>> import pybedtools
>>> exons = pybedtools.example_bedtool('a.bed')
>>> snps = pybedtools.example_bedtool('b.bed')
>>> exons_with_snps = exons.intersect(snps, u=True)
While we're on the subject of arguments, note that we didn't have to specify
`-a` or `-b` arguments, like you would need if calling `intersectBed` from the
command line. In other words, since `exons` refers to the file `a.bed` and
`snps` refers to the file `b.bed`, the following line::
>>> exons_with_snps = exons.intersect(snps, u=True)
is equivalent to the command line usage of::
$ intersectBed -a a.bed -b b.bed -u > tmpfile
But we didn't have to explicitly pass the argument for `-a` because
:class:`BedTool` objects make some assumptions for convenience.
We're calling a method on the :class:`BedTool` object `exons`, so
:mod:`pybedtools` assumes that the file `exons` points to (stored in the
attribute `exons.fn`) is the one we want to use as input. So by default, we
don't need to explicitly give the keyword argument `a=exons.fn` because the
:meth:`exons.intersect` method does so automatically.
We're also calling a method that takes a second bed file as input -- other
such methods include :meth:`BedTool.subtract` and :meth:`BedTool.closest`,
and others. For these methods, in addition to assuming `-a` is taken care
of by the :attr:`BedTool.fn` attribute, :mod:`pybedtools` also assumes the
first unnamed argument to these methods are the second file you want to
operate on (and if you pass a :class:`BedTool`, it'll automatically use the
file in the `fn` attribute of that :class:`BedTool`).
An example may help to illustrate: these different ways of calling
:meth:`BedTool.intersect` all have the same results, with the first version
being the most compact (and probably most convenient):
.. doctest::
>>> # these all have identical results
>>> x1 = exons.intersect(snps)
>>> x2 = exons.intersect(a=exons.fn, b=snps.fn)
>>> x3 = exons.intersect(b=snps.fn)
>>> x4 = exons.intersect(snps, a=exons.fn)
>>> x1 == x2 == x3 == x4
True
Note that `a.intersect(a=a.fn, b)` is not a valid Python expression, since
non-keyword arguments must come before keyword arguments, but
`a.intersect(b, a=a.fn)` works fine.
If you're ever unsure, the docstring for these methods indicates which, if
any, arguments are used as default. For example, in the
:meth:`BedTool.intersect` help, it says::
For convenience, the file or stream this BedTool points to is implicitly
passed as the -a argument to intersectBed
OK, enough about arguments for now, but you can read more about them in
:ref:`similarity principle`, :ref:`default args principle` and :ref:`non
defaults principle`.
pybedtools-0.9.0/docs/source/each.rst 0000664 0000000 0000000 00000004540 14173526301 0017573 0 ustar 00root root 0000000 0000000 .. include:: includeme.rst
Each
====
Similar to :meth:`BedTool.filter`, which applies a function to return True
or False given an :class:`Interval`, the :meth:`BedTool.each` method applies a
function to return a new, possibly modified :class:`Interval`.
The :meth:`BedTool.each` method applies a function to every feature. Like
:meth:`BedTool.filter`, you can use your own function or some pre-defined
ones in the :mod:`featurefuncs` module. Also like :meth:`filter`, `*args`
and `**kwargs` are sent to the function.
.. doctest::
:options: +NORMALIZE_WHITESPACE
>>> a = pybedtools.example_bedtool('a.bed')
>>> b = pybedtools.example_bedtool('b.bed')
>>> # The results of an "intersect" with c=True will return features
>>> # with an additional field representing the counts.
>>> with_counts = a.intersect(b, c=True)
Let's define a function that will take the number of counts in each feature
as calculated above and divide by the number of bases in that feature. We
can also supply an optional scalar, like 0.001, to get the results in
"number of intersections per kb". We then insert that value into the score
field of the feature. Here's the function:
.. doctest::
>>> def normalize_count(feature, scalar=0.001):
... """
... assume feature's last field is the count
... """
... counts = float(feature[-1])
... normalized = round(counts / (len(feature) * scalar), 2)
...
... # need to convert back to string to insert into feature
... feature.score = str(normalized)
... return feature
And we apply it like this:
.. doctest::
:options: +NORMALIZE_WHITESPACE
>>> normalized = with_counts.each(normalize_count)
>>> print(normalized)
chr1 1 100 feature1 0.0 + 0
chr1 100 200 feature2 10.0 + 1
chr1 150 500 feature3 2.86 - 1
chr1 900 950 feature4 20.0 + 1
Similar to :meth:`BedTool.filter`, we could have used the Python built-in
function `map` to map a function to each :class:`Interval`. In fact, this can
still be useful if you don't want a :class:`BedTool` object as a result. For
example::
>>> feature_lengths = map(len, a)
However, the :meth:`BedTool.each` method returns a :class:`BedTool` object,
which can be used in a chain of commands, e.g., ::
>>> a.intersect(b).each(normalize_count).filter(lamda x: float(x[4]) < 1e-5)
pybedtools-0.9.0/docs/source/example-script 0000664 0000000 0000000 00000006241 14173526301 0021021 0 ustar 00root root 0000000 0000000
import pybedtools
# Create a BedTool for the GFF file of annotations
g = pybedtools.BedTool('example.gff')
# Set up two functions that will filter and then rename features to set up for
# merging
def renamer(x):
"""
*x* is an Interval object representing a GFF feature.
Renames the feature after the feature type; this is needed for when
.merge() combines names together in a later step.
"""
# This illustrates setting and getting fields in an Interval object based
# on attribute or index
x.name = x[2]
return x
def filter_func(x):
"""
*x* is an Interval object representing a GFF feature.
This filter function will only pass features of type "intron" or "exon"
"""
if x[2] in ('intron', 'exon'):
return True
return False
# Filter and rename the GFF features by passing the above functions to
# .filter() and .each(). Note that since each method returns a new BedTool,
# methods can be chained together
g2 = g.filter(filter_func).each(renamer)
# Save a copy of the new GFF file for later inspection
g2 = g2.saveas('edited.gff')
# Here we call mergeBed, which operates on the file pointed to by g2
# (that is, 'edited.gff').
#
# We use several options for BEDTools mergeBed:
#
# `nms` combines names of merged features (after filtering and renaming, this
# is either "intron" or "exon") into a semicolon-delimited list;
#
# d=-1 does not merge bookended features together;
#
# s=True ensures a stranded merge;
#
# scores='sum' ensures a valid BED file result, with a score field before the
# strand field
#
merged = g2.merge(nms=True, d=-1, s=False, scores='sum')
# Next, we intersect a BAM file with the merged features. Here, we explicitly
# specify the `abam` and `b` arguments, ensure stranded intersections, use
# BED-format output, and report the entire a and b features in the output:
#
reads_in_features = merged.intersect(abam='example.bam',
b=merged.fn,
s=True,
bed=True,
wao=True)
# Set up a dictionary to hold counts
from collections import defaultdict
results = defaultdict(int)
# Iterate through the intersected reads, parse out the names of the features
# they intersected, and increment counts in the dictionary. This illustrates
# how BedTool objects follow the iterator protocol, each time yielding an
# Interval object:
#
total = 0.0
for intersected_read in reads_in_features:
total += 1
# Extract the name of the feature this read intersected by indexing into
# the Interval
intersected_feature = feature[-4]
# Convert names like "intron;intron;intron", which indicates overlapping
# isoforms or genes all with introns in this region, to the simple class of
# "intron"
key = ';'.join(sorted(list(set(intersected_with.split(';')))))
# Increment the count for this class
results[key] += 1
# Rename the "." key to something more meaningful
results['intergenic'] = results.pop('.')
# Add the total to the dictionary
results['total'] = int(total)
print results
# Delete any temporary files created
pybedtools.cleanup()
pybedtools-0.9.0/docs/source/example-script-nocomments 0000664 0000000 0000000 00000001657 14173526301 0023207 0 ustar 00root root 0000000 0000000 import pybedtools
g = pybedtools.BedTool('example.gff')
def renamer(x):
x.name = x[2]
return x
def filter_func(x):
if x[2] in ('intron', 'exon'):
return True
return False
g2 = g.filter(filter_func).each(renamer)
g2 = g2.saveas('edited.gff')
merged = g2.merge(nms=True, d=-1, s=False, scores='sum')
reads_in_features = merged.intersect(abam='example.bam',
b=merged.fn,
s=True,
bed=True,
wao=True)
from collections import defaultdict
results = defaultdict(int)
total = 0.0
for intersected_read in reads_in_features:
total += 1
intersected_feature = feature[-4]
key = ';'.join(sorted(list(set(intersected_with.split(';')))))
results[key] += 1
results['intergenic'] = results.pop('.')
results['total'] = int(total)
print results
pybedtools.cleanup()
pybedtools-0.9.0/docs/source/example_3 0000777 0000000 0000000 00000000000 14173526301 0030324 2../../pybedtools/scripts/intron_exon_reads.py ustar 00root root 0000000 0000000 pybedtools-0.9.0/docs/source/example_3_no_comments 0000664 0000000 0000000 00000002520 14173526301 0022336 0 ustar 00root root 0000000 0000000 import sys
import multiprocessing
import pybedtools
gff = pybedtools.example_filename('gdc.gff')
bam = pybedtools.example_filename('gdc.bam')
g = pybedtools.BedTool(gff).remove_invalid().saveas()
def featuretype_filter(feature, featuretype):
if feature[2] == featuretype:
return True
return False
def subset_featuretypes(featuretype):
result = g.filter(featuretype_filter, featuretype).saveas()
return pybedtools.BedTool(result.fn)
def count_reads_in_features(features_fn):
"""
Callback function to count reads in features
"""
return pybedtools.BedTool(bam).intersect(
b=features_fn,
stream=True).count()
pool = multiprocessing.Pool()
featuretypes = ('intron', 'exon')
introns, exons = pool.map(subset_featuretypes, featuretypes)
exon_only = exons.subtract(introns).merge().remove_invalid().saveas().fn
intron_only = introns.subtract(exons).merge().remove_invalid().saveas().fn
intron_and_exon = exons.intersect(introns).merge().remove_invalid().saveas().fn
features = (exon_only, intron_only, intron_and_exon)
results = pool.map(count_reads_in_features, features)
labels = (' exon only:',
' intron only:',
'intron and exon:')
for label, reads in zip(labels, results):
sys.stdout.write('%s %s\n' % (label, reads))
pybedtools-0.9.0/docs/source/filtering.rst 0000664 0000000 0000000 00000005501 14173526301 0020654 0 ustar 00root root 0000000 0000000
.. include:: includeme.rst
.. _filtering:
Filtering
~~~~~~~~~
The :meth:`BedTool.filter` method lets you pass in a function that accepts an
:class:`Interval` as its first argument and returns True for False. This
allows you to perform "grep"-like operations on :class:`BedTool` objects. For
example, here's how to get a new :class:`BedTool` containing features from `a`
that are more than 100 bp long:
.. doctest::
:options: +NORMALIZE_WHITESPACE
>>> a = pybedtools.example_bedtool('a.bed')
>>> b = a.filter(lambda x: len(x) > 100)
>>> print(b)
chr1 150 500 feature3 0 -
The :meth:`filter` method will pass its `*args` and `**kwargs` to the function
provided. So here is a more generic case, where the function is defined once
and different arguments are passed in for filtering on different lengths:
.. doctest::
:options: +NORMALIZE_WHITESPACE
>>> def len_filter(feature, L):
... "Returns True if feature is longer than L"
... return len(feature) > L
Now we can pass different lengths without defining a new function for each
length of interest, like this:
.. doctest::
:options: +NORMALIZE_WHITESPACE
>>> a = pybedtools.example_bedtool('a.bed')
>>> print(a.filter(len_filter, L=10))
chr1 1 100 feature1 0 +
chr1 100 200 feature2 0 +
chr1 150 500 feature3 0 -
chr1 900 950 feature4 0 +
>>> print(a.filter(len_filter, L=99))
chr1 100 200 feature2 0 +
chr1 150 500 feature3 0 -
>>> print(a.filter(len_filter, L=200))
chr1 150 500 feature3 0 -
See :ref:`BedTools as iterators` for more advanced and space-efficient usage
of :meth:`filter` using iterators.
Note that we could have used the built-in Python function, `filter()`, but that
would have returned an iterator that we would have to construct a new
:class:`pybedtools.BedTool` out of. The :meth:`BedTool.filter` method returns
a ready-to-use :class:`BedTool` object, which allows embedding of
:meth:`BedTool.filter` calls in a chain of commands, e.g.::
>>> a.intersect(b).filter(lambda x: len(x) < 100).merge()
Fast filtering functions in Cython
----------------------------------
The :mod:`featurefuncs` module contains some ready-made functions written
in Cython that will be faster than pure Python equivalents. For example,
there are :func:`greater_than` and :func:`less_than` functions, which are
about 70% faster. In IPython::
>>> from pybedtools.featurefuncs import greater_than
>>> len(a)
310456
>>> def L(x,width=100):
... return len(x) > 100
>>> # The %timeit command is from IPython, and won't work
>>> # in a regular Python script:
>>> %timeit a.filter(greater_than, 100)
1 loops, best of 3: 1.74 s per loop
>>> %timeit a.filter(L, 100)
1 loops, best of 3: 2.96 s per loop
pybedtools-0.9.0/docs/source/flow-of-commands.rst 0000664 0000000 0000000 00000011225 14173526301 0022041 0 ustar 00root root 0000000 0000000 Under the hood
==============
This section documents some details about what happens when a :class:`BedTool`
object is created and exactly what happens when a BEDTools command is called.
It's mostly useful for developers or for debugging.
There are three kinds of sources/sinks for BedTool objects:
* filename
* open file object
* iterator of Interval objects
Iterator "protocol"
-------------------
BedTool objects yield an Interval object on each `next()` call. Where this
Interval comes from depends on how the BedTool was created and what format the
underlying data are in, as follows.
Filename-based
~~~~~~~~~~~~~~
If BED/GTF/GFF/VCF format, then use an `IntervalFile` object for Cython/C++
speed.
If SAM format, then use an `IntervalIterator`. This is a Cython object that
reads individual lines and passes them to `create_interval_from_list`, a Cython
function. `create_interval_from_list` does a lot of the work to figure out
what format the line is, and this is how we are able to support SAM Interval
objects.
If BAM format, then first do a Popen call to `samtools view`, and create an
`IntervalIterator` from subprocess.PIPE similar to SAM format.
Open file-based
~~~~~~~~~~~~~~~
All formats are passed to an `IntervalIterator`, which reads one line at
a time and yields an `Interval` object.
If it's a BAM file (specifically, a detected bgzip stream), then it's actually
first sent to the stdin of a `samtools` Popen call, and then the
subprocess.PIPE from that Popen's stdout is sent to an `IntervalIterator`.
Iterator or generator-based
~~~~~~~~~~~~~~~~~~~~~~~~~~~
If it's neither of the above, then the assumption is that it's already an
iterable of `Interval` objects. This is the case if a `BedTool` is created
with something like::
a = pybedtools.example_bedtool('a.bed')
b = pybedtools.BedTool((i for i in a))
In this case, the `(i for i in a)` creates a generator of intervals from an
`IntervalFile` -- since `a` is a filename-based BedTool. Since the first
argument to the BedTool constructor is neither a filename nor an open file, the
new BedTool `b`'s `.fn` attribute is directly set to this generator . . . so we
have a generator-based BedTool.
Calling BEDTools programs
-------------------------
Depending on the type of BedTool (filename, open file, or iterator), the method
of calling BEDTools programs differs.
In all cases, BEDTools commands are called via a `subprocess.Popen` call
(hereafter called "the Popen" for convenience). Depending on the type of
BedTool objects being operated on, the Popen will be passed different objects
as stdin and/or stdout.
In general, using a filename as input is the most straightforward -- nothing is
passed to the Popen's stdin because the filenames are embedded in the BEDTools
command.
Using non-filename-based BedTools means that they are passed, one line at
a time, to the stdin of the Popen. The commands for the BEDTools call
will specify "stdin" in these cases, as is standard for the BEDTools suite.
The default is for the output to be file-based. In this case, an open tempfile
object is provided as the Popen's stdout.
If the returned BedTool is requested to be a "streaming" BedTool, then the
Popen's stdout will be subprocess.PIPE, and the new BedTool object will be
open-file based (which is what subprocess.PIPE acts like).
Specifically, here is the information flow of stdin/stdout for various
interconversions of BedTool types . . . .
:filename -> filename:
The calling BedTool is filename-based and `stream=False`.
* `stdin`: `None` (the filenames are provided in the BEDTools command)
* `stdout`: open tempfile object
* new BedTool: filename-based BedTool pointing to the tempfile's filename
:filename -> open file object:
The calling BedTool is filename-based and `stream=True` is requested.
* `stdin`: None (provided in the cmds)
* `stdout`: open file object -- specifically, subprocess.PIPE
* new BedTool: iterator-based BedTool. Each `next()` call retrieves the
next line in subprocess.PIPE
:open file object -> filename:
The calling BedTool is from, e.g., subprocess.PIPE and there's
a saveas() call to "render" to file.
* `stdin`: each line in the open file object is written to subprocess.PIPE
* `stdout`: open file object -- either a tempfile or new file created from
supplied filename
* new BedTool: filename-based BedTool
:open file object -> iterator:
The calling BedTool is usually based on subprocess.PIPE, and the output
will *also* come from subprocess.PIPE.
* `stdin`: each line from the open file is written to subprocess.PIPE
* `stdout`: open file object, subprocess.PIPE
* new BedTool: filename based on subprocess.PIPE
pybedtools-0.9.0/docs/source/history.rst 0000664 0000000 0000000 00000013543 14173526301 0020377 0 ustar 00root root 0000000 0000000 .. include:: includeme.rst
.. _`working with history`:
Using the history and tags
--------------------------
`BEDTools`_ makes it very easy to do rather complex genomic algebra. Sometimes
when you're doing some exploratory work, you'd like to rewind back to a
previous step, or clean up temporary files that have been left on disk over the
course of some experimentation.
To assist this sort of workflow, :class:`BedTool` instances keep track of
their history in the :attr:`BedTool.history` attribute. Let's make an
example :class:`BedTool`, `c`, that has some history:
.. doctest::
:options: +NORMALIZE_WHITESPACE
>>> a = pybedtools.example_bedtool('a.bed')
>>> b = pybedtools.example_bedtool('b.bed')
>>> c = a.intersect(b, u=True)
`c` now has a history which tells you all sorts of useful things (described
in more detail below)::
>>> print c.history
[ bedtool("/home/ryan/pybedtools/pybedtools/test/a.bed").intersect("/home/ryan/pybedtools/pybedtools/test/b.bed", u=True), parent tag: klkreuay, result tag: egzgnrvj]
There are several things to note here. First, the history describes the full
commands, including all the names of the temp files and all the arguments that
you would need to run in order to re-create it. Since :class:`BedTool` objects
are fundamentally file-based, the command refers to the underlying filenames
(i.e., :file:`a.bed` and :file:`b.bed`) instead of the :class:`BedTool`
instances (i.e., `a` and `b`). A simple copy-paste of the command will be
enough re-run the command. While this may be useful in some situations, be
aware that if you do run the command again you'll get *another* temp file that
has the same contents as `c`'s temp file.
To avoid such cluttering of your temp dir, the history also reports
**tags**. :class:`BedTool` objects, when created, get a random tag assigned
to them. You can get get the :class:`BedTool` associated with tag with the
:func:`pybedtools.find_tagged` function. These tags are used to keep track
of instances during this session.
So in this case, we could get a reference to the `a` instance with::
>>> should_be_a = pybedtools.find_tagged('klkreuay')
Here's confirmation that the parent of the first step of `c`'s history is
`a` (note that :class:`HistoryStep` objects have a
:attr:`HistoryStep.parent_tag` and :attr:`HistoryStep.result_tag`):
.. doctest::
>>> pybedtools.find_tagged(c.history[0].parent_tag) == a
True
Let's make something with a more complicated history:
.. doctest::
>>> a = pybedtools.example_bedtool('a.bed')
>>> b = pybedtools.example_bedtool('b.bed')
>>> c = a.intersect(b)
>>> d = c.slop(g=pybedtools.chromsizes('hg19'), b=1)
>>> e = d.merge()
>>> # this step adds complexity!
>>> f = e.subtract(b)
Let's see what the history of `f` (the last :class:`BedTool` created) looks
like . . . note that here I'm formatting the results to make it easier to
see::
>>> print f.history
[
| [
| | [
| | | [
| | | | BedTool("/usr/local/lib/python2.6/dist-packages/pybedtools/test/data/a.bed").intersect(
| | | | "/usr/local/lib/python2.6/dist-packages/pybedtools/test/data/b.bed",
| | | | ),
| | | | parent tag: rzrztxlw,
| | | | result tag: ifbsanqk
| | | ],
| | |
| | | BedTool("/tmp/pybedtools.BgULVj.tmp").slop(
| | | b=1,genome="hg19"
| | | ),
| | | parent tag: ifbsanqk,
| | | result tag: omfrkwjp
| | ],
| |
| | BedTool("/tmp/pybedtools.SFmbYc.tmp").merge(),
| | parent tag: omfrkwjp,
| | result tag: zlwqblvk
| ],
|
| BedTool("/tmp/pybedtools.wlBiMo.tmp").subtract(
| "/usr/local/lib/python2.6/dist-packages/pybedtools/test/data/b.bed",
| ),
| parent tag: zlwqblvk,
| result tag: reztxhen
]
Those first three history steps correspond to `c`, `d`, and `e`
respectively, as we can see by comparing the code snippet above with the
commands in each history step. In other words, `e` can be described by the
sequence of 3 commands in the first three history steps. In fact, if we
checked `e.history`, we'd see exactly those same 3 steps.
When `f` was created above, it operated both on `e`, which had its own
history, as well as `b` -- note the nesting of the list. You can do
arbitrarily complex "genome algebra" operations, and the history of the
:class:`BEDTools` will keep track of this. It may not be useful in every
situtation, but the ability to backtrack and have a record of what you've
done can sometimes be helpful.
Deleting temp files specific to a single :class:`BedTool`
---------------------------------------------------------
You can delete temp files that have been created over the history of a
:class:`BedTool` with :meth:`BedTool.delete_temporary_history`. This method
will inspect the history, figure out which items point to files in the temp dir
(which you can see with :func:`get_tempdir`), and prompt you for their
deletion::
>>> f.delete_temporary_history()
Delete these files?
/tmp/pybedtools..BgULVj.tmp
/tmp/pybedtools.SFmbYc.tmp
/tmp/pybedtools.wlBiMo.tmp
(y/N) y
Note that the file that `f` points to is left alone. To clarify, the
:meth:`BedTool.delete_temporary_history` will only delete temp files that match
the pattern ``/pybedtools.*.tmp`` from the history of `f`, up to but
not including the file for `f` itself. Any :class:`BedTool` instances that do
not match the pattern are left alone. Use the kwarg `ask=False` to disable
the prompt.
pybedtools-0.9.0/docs/source/images/ 0000775 0000000 0000000 00000000000 14173526301 0017403 5 ustar 00root root 0000000 0000000 pybedtools-0.9.0/docs/source/images/downloads.png 0000664 0000000 0000000 00000004573 14173526301 0022114 0 ustar 00root root 0000000 0000000 ‰PNG
IHDR — 4 u=n sRGB ®Îé pHYs šœ tIMEÛ ±íO tEXtComment Created with GIMPW èIDATxÚíœ{PT×Ç÷¹÷î²°]–‡"+‚D¾™¨´#Öd:阩B5ö:
Zm§™Ìäd¦¦qÛÌtĦµ“8NÆÑÔh¢¬`"ŠBTÀEV@`ìÂÞ½súÇ"2ެn]*íœÏ_»çžûûÝ=ßû;¿ó¸w)Y–ð?Mš€¨H *ˆŠ¢"Q‘0Í`Ãn·{ddcLZê¹À0Œ(Šf³Y§Ó…©FM6_ôz½n·Ûd2éõzҚϑÑÑQÇ“˜˜Y,*Šâv»V+M“.÷9£×ëAèëëÓétÇE].—Éd"N—ÁM›L&—ËY,úý~“ÉDÒáôAÅ¡¡!‹ÅA,jšFnºFÒg’ù"ašÏIR$±H *ˆŠDEÝD,uzo³4¡ˆ3¦Í[Zº~Yf* (j*]›_©ªzÅÂFäå1Í¥ôœø`ßO좻6¦óStÍÓ>õqñ¦¸X‘e¸ûÊ—û÷oý/õO¬ Foõ‘býT¯e²Ñ2tàÀ¿ß_YY‰Ú¿¿Ãáx¤BFFÆöíÛišÞ»w/ TVV†µ'½ùÎæL K÷êÿ±÷p‹§áhíÒ_¯{%S ©S±
hš¦(ª¢¢bÏž=WoÊˈ¡(Êét†j"„ÂÛ”eYÓ4ˆÏ_¿öBËç÷\M—Ö¬·‚š·ý—_×·Þõ* KÈ(\½~]Q27pvïOÝ]¼c×Òù@Ç'ïW_²s+Þ®˜«Wz¾Ø½¯Ö¿²²jÕÈgïU_S3Ê6Ú®Ÿm¼åRØøÌ¥².ÇHá‡Ý#Æ«žÇx ºëŽ;ÛâA ”Þj_±áÕå³
4 _{Í‘ÏkoÉœyîÒ=z`
û/Ÿ8zº©Ó-€`Î*.}mÝü6JÙÓÑGÄø‰!½^¿uëV–»E8ŽÛ¶m›Á`xD¶ð–&”Т--