Source code for dandelion.tutorial._tutorial

from importlib.resources import path
import subprocess

from pathlib import Path
from urllib.request import Request, urlopen


def _download_file(url: str, dest: Path | str, chunk_size: int = 8192):
    """Download a file using urllib.

    Parameters
    ----------
    url : str
        URL of the file to download.
    dest : Path | str
        Destination file path to write the downloaded content.
    chunk_size : int, optional
        Number of bytes to read per chunk. Defaults to 8192.
    """
    req = Request(
        url, headers={"User-Agent": "Mozilla/5.0 (compatible; Python urllib)"}
    )
    with urlopen(req) as response, open(dest, "wb") as out_file:
        while True:
            chunk = response.read(chunk_size)
            if not chunk:
                break
            out_file.write(chunk)


[docs] def setup_dandelion_tutorial_bcr(path: Path | str | None = None) -> None: """Download example BCR datasets for Dandelion tutorial. Downloads 10x Genomics PBMC BCR datasets into a local directory for use in the dandelion BCR preprocessing tutorial. Parameters ---------- path : Path | str | None, optional Root directory to download datasets into. Defaults to ``./dandelion_tutorial``. """ base = Path("./dandelion_tutorial") if path is None else Path(path) base.mkdir(parents=True, exist_ok=True) datasets = { "vdj_v1_hs_pbmc3_b": { "filtered_feature_bc_matrix.h5": "https://cf.10xgenomics.com/samples/cell-vdj/3.1.0/vdj_v1_hs_pbmc3/vdj_v1_hs_pbmc3_filtered_feature_bc_matrix.h5", "filtered_contig_annotations.csv": "https://cf.10xgenomics.com/samples/cell-vdj/3.1.0/vdj_v1_hs_pbmc3/vdj_v1_hs_pbmc3_b_filtered_contig_annotations.csv", "filtered_contig.fasta": "https://cf.10xgenomics.com/samples/cell-vdj/3.1.0/vdj_v1_hs_pbmc3/vdj_v1_hs_pbmc3_b_filtered_contig.fasta", }, "vdj_nextgem_hs_pbmc3_b": { "filtered_feature_bc_matrix.h5": "https://cf.10xgenomics.com/samples/cell-vdj/3.1.0/vdj_nextgem_hs_pbmc3/vdj_nextgem_hs_pbmc3_filtered_feature_bc_matrix.h5", "filtered_contig_annotations.csv": "https://cf.10xgenomics.com/samples/cell-vdj/3.1.0/vdj_nextgem_hs_pbmc3/vdj_nextgem_hs_pbmc3_b_filtered_contig_annotations.csv", "filtered_contig.fasta": "https://cf.10xgenomics.com/samples/cell-vdj/3.1.0/vdj_nextgem_hs_pbmc3/vdj_nextgem_hs_pbmc3_b_filtered_contig.fasta", }, "sc5p_v2_hs_PBMC_10k_b": { "filtered_feature_bc_matrix.h5": "https://cf.10xgenomics.com/samples/cell-vdj/4.0.0/sc5p_v2_hs_PBMC_10k/sc5p_v2_hs_PBMC_10k_filtered_feature_bc_matrix.h5", "filtered_contig_annotations.csv": "https://cf.10xgenomics.com/samples/cell-vdj/4.0.0/sc5p_v2_hs_PBMC_10k/sc5p_v2_hs_PBMC_10k_b_filtered_contig_annotations.csv", "filtered_contig.fasta": "https://cf.10xgenomics.com/samples/cell-vdj/4.0.0/sc5p_v2_hs_PBMC_10k/sc5p_v2_hs_PBMC_10k_b_filtered_contig.fasta", "airr_rearrangement.tsv": "https://cf.10xgenomics.com/samples/cell-vdj/4.0.0/sc5p_v2_hs_PBMC_10k/sc5p_v2_hs_PBMC_10k_b_airr_rearrangement.tsv", }, "sc5p_v2_hs_PBMC_1k_b": { "filtered_feature_bc_matrix.h5": "https://cf.10xgenomics.com/samples/cell-vdj/4.0.0/sc5p_v2_hs_PBMC_1k/sc5p_v2_hs_PBMC_1k_filtered_feature_bc_matrix.h5", "filtered_contig_annotations.csv": "https://cf.10xgenomics.com/samples/cell-vdj/4.0.0/sc5p_v2_hs_PBMC_1k/sc5p_v2_hs_PBMC_1k_b_filtered_contig_annotations.csv", "filtered_contig.fasta": "https://cf.10xgenomics.com/samples/cell-vdj/4.0.0/sc5p_v2_hs_PBMC_1k/sc5p_v2_hs_PBMC_1k_b_filtered_contig.fasta", }, } for dirname, files in datasets.items(): dirpath = base / dirname dirpath.mkdir(parents=True, exist_ok=True) for filename, url in files.items(): outfile = dirpath / filename if outfile.exists(): continue print(f"Downloading {filename}{outfile}") _download_file(url, outfile)
[docs] def setup_dandelion_tutorial_tcr(path: Path | str | None = None) -> None: """Download example TCR datasets for Dandelion tutorial. Downloads 10x Genomics PBMC and melanoma TCR datasets into a local directory for use in the dandelion TCR preprocessing tutorial. Parameters ---------- path : Path | str | None, optional Root directory to download datasets into. Defaults to ``./dandelion_tutorial``. """ base = Path("./dandelion_tutorial") if path is None else Path(path) base.mkdir(parents=True, exist_ok=True) datasets = { "sc5p_v2_hs_PBMC_10k_t": { "filtered_feature_bc_matrix.h5": "https://cf.10xgenomics.com/samples/cell-vdj/4.0.0/sc5p_v2_hs_PBMC_10k/sc5p_v2_hs_PBMC_10k_filtered_feature_bc_matrix.h5", "airr_rearrangement.tsv": "https://cf.10xgenomics.com/samples/cell-vdj/4.0.0/sc5p_v2_hs_PBMC_10k/sc5p_v2_hs_PBMC_10k_t_airr_rearrangement.tsv", "filtered_contig_annotations.csv": "https://cf.10xgenomics.com/samples/cell-vdj/4.0.0/sc5p_v2_hs_PBMC_10k/sc5p_v2_hs_PBMC_10k_t_filtered_contig_annotations.csv", "filtered_contig.fasta": "https://cf.10xgenomics.com/samples/cell-vdj/4.0.0/sc5p_v2_hs_PBMC_10k/sc5p_v2_hs_PBMC_10k_t_filtered_contig.fasta", }, "sc5p_v1p1_hs_melanoma_10k_t": { "filtered_feature_bc_matrix.h5": "https://cf.10xgenomics.com/samples/cell-vdj/4.0.0/sc5p_v1p1_hs_melanoma_10k/sc5p_v1p1_hs_melanoma_10k_filtered_feature_bc_matrix.h5", "airr_rearrangement.tsv": "https://cf.10xgenomics.com/samples/cell-vdj/4.0.0/sc5p_v1p1_hs_melanoma_10k/sc5p_v1p1_hs_melanoma_10k_t_airr_rearrangement.tsv", "filtered_contig_annotations.csv": "https://cf.10xgenomics.com/samples/cell-vdj/4.0.0/sc5p_v1p1_hs_melanoma_10k/sc5p_v1p1_hs_melanoma_10k_t_filtered_contig_annotations.csv", "filtered_contig.fasta": "https://cf.10xgenomics.com/samples/cell-vdj/4.0.0/sc5p_v1p1_hs_melanoma_10k/sc5p_v1p1_hs_melanoma_10k_t_filtered_contig.fasta", }, } for dirname, files in datasets.items(): dirpath = base / dirname dirpath.mkdir(parents=True, exist_ok=True) for filename, url in files.items(): outfile = dirpath / filename if outfile.exists(): continue print(f"Downloading {filename}{outfile}") _download_file(url, outfile)
[docs] def setup_dandelion_tutorial_trajectory(path: Path | str | None = None) -> None: """Download example datasets for Dandelion V(D)J trajectory tutorial. Downloads panfetal B-cell trajectory GEX and VDJ data from Google Drive using ``gdown``. Parameters ---------- path : Path | str | None, optional Root directory to download datasets into. Defaults to ``./dandelion_tutorial``. Raises ------ ImportError If ``gdown`` is not installed. """ try: import gdown except ImportError: raise ImportError( "gdown is required to download the trajectory tutorial data. Please install it via `pip install gdown`." ) base = Path("./dandelion_tutorial") if path is None else Path(path) base.mkdir(parents=True, exist_ok=True) gex_id = "1-LbAinwhAhJW3Y60wpO9GWJJcaMa_liy" vdj_id = "1lyScJWdGopW2nLoIhZmfUGVSWLWI_qWg" datasets = { "panfetal_trajectory": { "demo-pseudobulk.h5ad": f"https://drive.google.com/uc?id={gex_id}", "demo-vdj-traj.tsv.gz": f"https://drive.google.com/uc?id={vdj_id}", } } for dirname, files in datasets.items(): dirpath = base / dirname dirpath.mkdir(parents=True, exist_ok=True) for filename, url in files.items(): outfile = dirpath / filename if outfile.exists(): continue print(f"Downloading {filename}{outfile}") gdown.download(url, str(outfile), quiet=False)
def setup_dandelion_tutorial_simple(path: Path | str | None = None) -> None: """Download example datasets for Dandelion simple tutorial. Downloads a small demo dataset with GEX and BCR data from Google Drive using ``gdown``. Parameters ---------- path : Path | str | None, optional Root directory to download datasets into. Defaults to ``./dandelion_tutorial``. Raises ------ ImportError If ``gdown`` is not installed. """ try: import gdown except ImportError: raise ImportError( "gdown is required to download the simple tutorial data. Please install it via `pip install gdown`." ) base = Path("./dandelion_tutorial") if path is None else Path(path) base.mkdir(parents=True, exist_ok=True) gex_id = "1-PrwDi1Py8jqioNtP0DISKrcShRRHKxk" vdj_id = "1-d_uah-NzJqDYRP53ICgAAquiVLWRRtN" datasets = { "simple_demo": { "demo-gex.h5ad": f"https://drive.google.com/uc?id={gex_id}", "demo-vdj.h5ddl": f"https://drive.google.com/uc?id={vdj_id}", } } for dirname, files in datasets.items(): dirpath = base / dirname dirpath.mkdir(parents=True, exist_ok=True) for filename, url in files.items(): outfile = dirpath / filename if outfile.exists(): continue print(f"Downloading {filename}{outfile}") gdown.download(url, str(outfile), quiet=False)
[docs] def setup_dandelion_tutorial_parse(path: Path | str | None = None) -> None: """Download the extremely large dataset from Parse Biosciences for Dandelion tutorial. Downloads the Parse Biosciences 1M human BCR dataset (cell metadata CSV and AIRR rearrangement TSV) into a local directory. Parameters ---------- path : Path | str | None, optional Root directory to download datasets into. Defaults to ``./dandelion_tutorial``. """ base = Path("./dandelion_tutorial") if path is None else Path(path) base.mkdir(parents=True, exist_ok=True) datasets = { "human-bcr-1m": { "cell_metadata.csv": "https://cdn.parsebiosciences.com/bcr/human-bcr-1m/cell_metadata.csv", "bcr_annotation_airr.tsv": "https://cdn.parsebiosciences.com/bcr/human-bcr-1m/bcr_annotation_airr.tsv", } } for dirname, files in datasets.items(): dirpath = base / dirname dirpath.mkdir(parents=True, exist_ok=True) for filename, url in files.items(): outfile = dirpath / filename if outfile.exists(): continue print(f"Downloading {filename}{outfile}") _download_file(url, outfile)
def setup_colab_singularity() -> None: # pragma: no cover """Install and configure Apptainer/Singularity in a Google Colab environment. Installs ``apptainer-suid`` from the official PPA, wraps it with ``unshare -r`` so that it operates correctly inside Colab's unprivileged container, and registers the Sylabs remote. Safe to re-run; existing files are backed up rather than overwritten. Raises ------ subprocess.CalledProcessError If any step of the installation script exits with a non-zero status. """ bash_script = r""" set -e echo "Installing Apptainer..." sudo apt update -qq sudo apt install -y -qq software-properties-common if ! grep -q apptainer /etc/apt/sources.list /etc/apt/sources.list.d/* 2>/dev/null; then sudo add-apt-repository -y ppa:apptainer/ppa sudo apt update -qq fi sudo apt install -y -qq apptainer-suid echo "Configuring fakeroot..." sudo apptainer config fakeroot --add root || true echo "Creating singularity wrapper..." echo 'unshare -r apptainer "$@"' | sudo tee /usr/bin/singularity_test > /dev/null sudo chmod +x /usr/bin/singularity_test sudo mv /usr/bin/singularity /usr/bin/singularity_backup 2>/dev/null || true sudo mv /usr/bin/singularity_test /usr/bin/singularity echo "Adding Sylabs remote..." apptainer remote add --no-login SylabsCloud cloud.sylabs.io || true apptainer remote use SylabsCloud echo "Done." """ subprocess.run(["bash", "-c", bash_script], check=True) print("\n✅ Singularity / Apptainer ready!")