Source code for scprep.io.mtx

from .. import utils
from .utils import _matrix_to_data_frame
from scipy import sparse

import os
import pandas as pd
import scipy.io as sio


[docs]def load_mtx(mtx_file, cell_axis="row", gene_names=None, cell_names=None, sparse=None): """Load a mtx file. Parameters ---------- filename : str The name of the mtx file to be loaded cell_axis : {'row', 'column'}, optional (default: 'row') If your data has genes on the rows and cells on the columns, use cell_axis='column' gene_names : `str`, array-like, or `None` (default: None) Expects a filename or an array containing a list of gene symbols or ids cell_names : `str`, array-like, or `None` (default: None) Expects a filename or an array containing a list of cell barcodes. sparse : bool, optional (default: None) If True, loads the data as a pd.DataFrame[pd.SparseArray]. This uses less memory but more CPU. Returns ------- data : array-like, shape=[n_samples, n_features] If either gene or cell names are given, data will be a pd.DataFrame or pd.DataFrame[pd.SparseArray]. If no names are given, data will be a np.ndarray or scipy.sparse.spmatrix """ if cell_axis not in ["row", "column", "col"]: raise ValueError( "cell_axis {} not recognized. Expected 'row' or 'column'".format(cell_axis) ) # Read in mtx file data = sio.mmread(mtx_file) if cell_axis in ["column", "col"]: data = data.T data = _matrix_to_data_frame( data, gene_names=gene_names, cell_names=cell_names, sparse=sparse ) return data
[docs]def save_mtx(data, destination, cell_names=None, gene_names=None): """Save a mtx file. Parameters ---------- data : array-like, shape=[n_samples, n_features] Input data, saved to destination/matrix.mtx destination : str Directory in which to save the data cell_names : list-like, shape=[n_samples], optional (default: None) Cell names associated with rows, saved to destination/cell_names.tsv. If `data` is a pandas DataFrame and `cell_names` is None, these are autopopulated from `data.index`. gene_names : list-like, shape=[n_features], optional (default: None) Cell names associated with rows, saved to destination/gene_names.tsv. If `data` is a pandas DataFrame and `gene_names` is None, these are autopopulated from `data.columns`. Examples -------- >>> import scprep >>> scprep.io.save_mtx(data, destination="my_data") >>> reload = scprep.io.load_mtx("my_data/matrix.mtx", ... cell_names="my_data/cell_names.tsv", ... gene_names="my_data/gene_names.tsv") """ if isinstance(data, pd.DataFrame): if cell_names is None: cell_names = data.index if gene_names is None: gene_names = data.columns data = utils.to_array_or_spmatrix(data) data = sparse.coo_matrix(data) # handle ~/ and relative paths destination = os.path.expanduser(destination) if not os.path.isdir(destination): os.mkdir(destination) if cell_names is not None: with open(os.path.join(destination, "cell_names.tsv"), "w") as handle: for name in cell_names: handle.write("{}\n".format(name)) if gene_names is not None: with open(os.path.join(destination, "gene_names.tsv"), "w") as handle: for name in gene_names: handle.write("{}\n".format(name)) sio.mmwrite(os.path.join(destination, "matrix.mtx"), data)