Source code for fgread.readers

import anndata
import numpy as np
import pandas as pd
import scipy.sparse as sp
import scanpy as sc
from pathlib import Path
from . import DOCSURL


[docs]def read_loom_to_anndata(ds_file: Path):
    """Reads a dataset in the loom format into the AnnData format."""

    adata = anndata.read_loom(ds_file)
    return adata


[docs]def read_seurat_to_anndata(ds_file: Path):
    """Reads a dataset in the Seurat format into the AnnData format (not implemented)."""

    raise NotImplementedError(
        f"Reading of Seurat files not implemented.\nSee {DOCSURL} for more information."
    )


[docs]def read_anndata_to_anndata(ds_file: Path):
    """Reads a dataset in the AnnData format into the AnnData format."""

    adata = anndata.read_h5ad(ds_file)
    return adata


[docs]def read_10xhdf5_to_anndata(ds_file: Path):
    """Reads a dataset in the 10x hdf5 format into the AnnData format."""

    adata = sc.read_10x_h5(ds_file)
    return adata


[docs]def read_10xmtx_to_anndata(ds_file: Path):
    """Reads a dataset in the 10x mtx format into the AnnData format."""

    adata = sc.read_10x_mtx(ds_file.parent)
    return adata


[docs]def read_densetsv_to_anndata(ds_file: Path):
    """Reads a dense text file in tsv format into the AnnData format."""

    return read_densemat_to_anndata(ds_file, sep="\t")


[docs]def read_densecsv_to_anndata(ds_file: Path):
    """Reads a dense text file in csv format into the AnnData format."""

    return read_densemat_to_anndata(ds_file, sep=",")


[docs]def read_densemat_to_anndata(ds_file: Path, sep=None):
    """Helper function to read dense text files in tsv and csv format.
    The separator (tab or comma) is passed by the corresponding function."""

    file = ds_file

    with open(file) as f:
        cells = f.readline().replace('"', "").split(sep)
        nextline = f.readline().replace('"', "").split(sep)
        n_cells = len(nextline) - 1
        cells = cells[-n_cells:]

    genes = pd.read_csv(
        file, skiprows=1, usecols=(0,), header=None, names=["GeneID"]
    ).set_index("GeneID")
    X = np.loadtxt(
        file,
        delimiter=sep,
        skiprows=1,
        usecols=range(1, len(cells) + 1),
        dtype=np.float32,
    ).T
    X = sp.csr_matrix(X)

    var = genes
    obs = pd.DataFrame(cells, columns=["sample"], index=pd.Series(cells, name="CellID"))

    adata = anndata.AnnData(X=X, var=var, obs=obs)
    return adata