From c235e5326a82fe03e36cdec956508b4b9dfdfce1 Mon Sep 17 00:00:00 2001 From: Till Hartmann Date: Thu, 22 Apr 2021 11:20:53 +0200 Subject: [PATCH 1/2] add cosmic database download wrapper --- bio/cosmic-db/environment.yaml | 8 ++++ bio/cosmic-db/meta.yaml | 9 +++++ bio/cosmic-db/test/Snakefile | 16 ++++++++ bio/cosmic-db/wrapper.py | 67 ++++++++++++++++++++++++++++++++++ test.py | 8 ++++ 5 files changed, 108 insertions(+) create mode 100644 bio/cosmic-db/environment.yaml create mode 100644 bio/cosmic-db/meta.yaml create mode 100644 bio/cosmic-db/test/Snakefile create mode 100644 bio/cosmic-db/wrapper.py diff --git a/bio/cosmic-db/environment.yaml b/bio/cosmic-db/environment.yaml new file mode 100644 index 00000000000..be6a91a1f3b --- /dev/null +++ b/bio/cosmic-db/environment.yaml @@ -0,0 +1,8 @@ +channels: + - bioconda + - conda-forge + - defaults +dependencies: + - curl =7 + - python >=3.6.2 + - requests =2.25 diff --git a/bio/cosmic-db/meta.yaml b/bio/cosmic-db/meta.yaml new file mode 100644 index 00000000000..809a0fff19a --- /dev/null +++ b/bio/cosmic-db/meta.yaml @@ -0,0 +1,9 @@ +name: cosmic-db +description: | + Download cosmic databases from https://cancer.sanger.ac.uk/cosmic/download +authors: + - Till Hartmann +input: + - genome build, cosmic release version, dataset and file name +output: + - database diff --git a/bio/cosmic-db/test/Snakefile b/bio/cosmic-db/test/Snakefile new file mode 100644 index 00000000000..10fbafd1b6a --- /dev/null +++ b/bio/cosmic-db/test/Snakefile @@ -0,0 +1,16 @@ +envvars: + "COSMIC_EMAIL", + "COSMIC_PW" + +rule cosmic_download: + output: + "resources/{db}" + params: + build="GRCh38", + dataset="cosmic", + version="v92", + file=lambda wc: wc.db # e.g. "CosmicHGNC.tsv.gz" + log: + "logs/cosmic-db/{db}.log" + wrapper: + "master/bio/cosmic-db" diff --git a/bio/cosmic-db/wrapper.py b/bio/cosmic-db/wrapper.py new file mode 100644 index 00000000000..999360ec10a --- /dev/null +++ b/bio/cosmic-db/wrapper.py @@ -0,0 +1,67 @@ +"""Snakemake wrapper for trimming paired-end reads using cutadapt.""" + +__author__ = "Till Hartmann" +__copyright__ = "Copyright 2021, Till Hartmann" +__email__ = "till.hartmann@udo.edu" +__license__ = "MIT" + +import requests +import os +from typing import List +from snakemake.shell import shell + +email = os.environ["COSMIC_EMAIL"] +password = os.environ["COSMIC_PW"] +assert email, "$COSMIC_EMAIL is not set" +assert password, "$COSMIC_PW is not set" + +COSMIC_URL = "https://cancer.sanger.ac.uk/cosmic/file_download" + + +def available_builds() -> List[str]: + builds = requests.get(COSMIC_URL).json() + return builds + + +def available_datasets(build: str) -> List[str]: + datasets = requests.get(f"{COSMIC_URL}/{build}").json() + return [d.rpartition("/")[-1] for d in datasets] + + +def available_versions(build: str, dataset: str) -> List[str]: + versions = requests.get(f"{COSMIC_URL}/{build}/{dataset}").json() + return [v.rpartition("/")[-1] for v in versions] + + +def available_files(build: str, dataset: str, version: str) -> List[str]: + files = requests.get(f"{COSMIC_URL}/{build}/{dataset}/{version}").json() + return [f.rpartition("/")[-1] for f in files] + + +def download_path(build: str, dataset: str, version: str, file: str) -> str: + return f"{COSMIC_URL}/{build}/{dataset}/{version}/{file}" + + +build = snakemake.params.get("build", "") +dataset = snakemake.params.get("dataset", "") +version = snakemake.params.get("version", "") +file = snakemake.params.get("file", "") +log = snakemake.log_fmt_shell(stdout=False, stderr=True) + +builds = available_builds() +assert build in builds, f"{build} is not available. Choose one of: {builds}." + +datasets = available_datasets(build) +assert dataset in datasets, f"{dataset} is not available. Choose one of: {datasets}." + +versions = available_versions(build, dataset) +assert version in versions, f"{version} is not available. Choose one of: {versions}." + +files = available_files(build, dataset, version) +assert file in files, f"{file} is not available. Choose one of: {files}." + +download_url = requests.get( + download_path(build, dataset, version, file), auth=(email, password) +).json()["url"].strip() + +shell('curl "{download_url}" -o {snakemake.output[0]} {log}') diff --git a/test.py b/test.py index 964479541c2..aeb4c3f0913 100644 --- a/test.py +++ b/test.py @@ -125,6 +125,14 @@ def run(wrapper, cmd, check_log=None): os.chdir(origdir) +@skip_if_not_modified +def test_download_cosmic_db(): + run( + "bio/cosmic-db", + ["snakemake", "--cores", "1", "--use-conda", "resources/CosmicHGNC.tsv.gz"], + ) + + @skip_if_not_modified def test_open_cravat_run(): run( From 4818014d61e06d6a73160df497141b6e0f642660 Mon Sep 17 00:00:00 2001 From: Till Hartmann Date: Thu, 22 Apr 2021 11:31:02 +0200 Subject: [PATCH 2/2] black reformat --- bio/cosmic-db/wrapper.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/bio/cosmic-db/wrapper.py b/bio/cosmic-db/wrapper.py index 999360ec10a..e769ff551ee 100644 --- a/bio/cosmic-db/wrapper.py +++ b/bio/cosmic-db/wrapper.py @@ -60,8 +60,10 @@ def download_path(build: str, dataset: str, version: str, file: str) -> str: files = available_files(build, dataset, version) assert file in files, f"{file} is not available. Choose one of: {files}." -download_url = requests.get( - download_path(build, dataset, version, file), auth=(email, password) -).json()["url"].strip() +download_url = ( + requests.get(download_path(build, dataset, version, file), auth=(email, password)) + .json()["url"] + .strip() +) shell('curl "{download_url}" -o {snakemake.output[0]} {log}')