Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

updating with sc_pipeline_muon_dev #13

Merged
merged 2 commits into from
Mar 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Available pipelines:

See [installation instrcutions here](https://github.com/DendrouLab/panpipes/blob/main/docs/install.md)

<!-- Oxford BMRC Rescomp users find additional advice in [docs/installation_rescomp](https://github.com/DendrouLab/panpipes/blob/main/docs/installation_rescomp.md) -->
Oxford BMRC Rescomp users find additional advice in [docs/installation_rescomp](https://github.com/DendrouLab/panpipes/blob/main/docs/installation_rescomp.md)

# General principles for running pipelines

Expand Down
2 changes: 1 addition & 1 deletion docs/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ conda activate pipeline_env
we include an environment.yml for a conda environment tested on all the pipelines packaged in this version of Panpipes.

##### Step 2 Download and install this repo

If you have not already set up SSH keys for github first follow these [instructions](https://github.com/DendrouLab/panpipes/docs/set_up_ssh_keys_for_github.md):

```
git clone https://github.com/DendrouLab/panpipes
Expand Down
25 changes: 25 additions & 0 deletions docs/set_up_ssh_keys_for_github.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@

## Set up SSH key for GitHub
For more advice: https://docs.github.com/en/authentication/connecting-to-github-with-ssh/about-ssh

After checking for existing keys, if you receive error that ~/.ssh doesn't exist then you don't have one. If there already is one (ie. id_rsa.pub, id_ed25519.pub) then you can either connect it to GitHub or generate new one.
```
ls -al ~/.ssh #check for existing keys
ssh-keygen -t ed25519 -C "[email protected]" #use your GitHub email address
#Enter a file in which to save the key (/c/Users/you/.ssh/id_algorithm):[Press enter]
#Enter passphrase (empty for no passphrase): [Type a passphrase]
eval "$(ssh-agent -s)" #start ssh-agent
ssh-add ~/.ssh/id_ed25519 #add your SSH private key to ssh-agent
clip < ~/.ssh/id_ed25519.pub #copy SSH public key
```
After copying your SSH public key, go to GitHub --> Settings --> SSH and GPG keys (under Access) --> Add new public SSH key

To test connection
```
ssh -T [email protected]
```
A successful connection should result in
> Hi username! You've successfully authenticated, but GitHub does not provide shell access.

Activate the environment
```
60 changes: 52 additions & 8 deletions panpipes/funcs/scmethods.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from scipy.sparse import issparse
from scanpy.get import obs_df as get_obs_df
from scanpy.pp import normalize_total
import scanpy as sc
import warnings
import logging
from typing import Optional, Literal
Expand Down Expand Up @@ -38,9 +39,53 @@ def exp_mean_dense(x):
# convert out of compressed sparse matrix
return np.log((np.sum(np.exp(x)-1)/x.shape[1]) + 1)



def pseudo_seurat(adata, arg_minpct=0.1, arg_mindiffpct=-float("inf"), arg_logfcdiff=0.25, use_dense=False):
def find_all_markers_pseudo_seurat(
adata,
groups,
groupby,
layer=None,
method=None,
n_genes=float("inf"),
corr_method="bonferroni",
arg_minpct=0.1,
arg_mindiffpct=-float("inf"),
arg_logfcdiff=0.25):
# add replace X with layer
if layer is not None:
adata.X = adata.layers[layer]
# need to check is the assay layer is dense or not
assay_is_sparse = issparse(adata.X)
use_dense = assay_is_sparse==False
if groups == 'all':
groups = adata.obs[groupby].unique().tolist()
markers_dict = {}
filter_dict = {}
for cv in groups:
# \ set up idenst as cv ==1 and everything else = 0
adata.obs['idents'] = ['1' if x == cv else '0' for x in adata.obs[groupby]]
filter_dict[cv] = pseudo_seurat(adata, use_dense=use_dense,arg_minpct=arg_minpct,
arg_mindiffpct=arg_mindiffpct,
arg_logfcdiff=arg_logfcdiff )
logging.info("number of genes remaining after filtering: %i\n" % filter_dict[cv]['background'].sum())
adata_rg = adata[:, filter_dict[cv]['background'].tolist()].copy()
sc.tl.rank_genes_groups(adata_rg, layer=layer,
groupby="idents", groups=["1"],
reference="0",
method=method,
n_genes=float("inf"),
corr_method="bonferroni")
markers_dict[cv] = sc.get.rank_genes_groups_df(adata_rg, group="1")
# remove adata from mem
adata_rg = None
markers = pd.concat(markers_dict.values(), keys=markers_dict.keys())
filter_stats = pd.concat(filter_dict.values(), keys=filter_dict.keys())
return markers, filter_stats

def pseudo_seurat(adata,
arg_minpct=0.1,
arg_mindiffpct=-float("inf"),
arg_logfcdiff=0.25,
use_dense=False):
"""
alternative method that"s more like seurat (pseudo seurat if you will)
In that you filter genes before running rank genes
Expand Down Expand Up @@ -79,7 +124,6 @@ def pseudo_seurat(adata, arg_minpct=0.1, arg_mindiffpct=-float("inf"), arg_logfc
min_pct = pcts.min(axis=1)
diff_pct = max_pct - min_pct
take_diff_pct = diff_pct > arg_mindiffpct

# remove genes that are not expressed higher than 0.1 in one of the groups
take_min_pct = max_pct > arg_minpct

Expand All @@ -88,7 +132,7 @@ def pseudo_seurat(adata, arg_minpct=0.1, arg_mindiffpct=-float("inf"), arg_logfc
# this has the potential to be very slow. Transposeing it speeds it up a bit.
# I need to undertand sparse matrices better to make it work
if use_dense:
print("using dense matrix")
logging.info("using dense matrix")
# extract the counts for cluster cells and calculate exp means on each row
nct = adata.X.T[:, cluster_cells_ind]
cluster_mean = np.apply_along_axis(exp_mean_dense, 1, nct.todense())
Expand All @@ -98,7 +142,7 @@ def pseudo_seurat(adata, arg_minpct=0.1, arg_mindiffpct=-float("inf"), arg_logfc
other_mean = np.apply_along_axis(exp_mean_dense, 1, nct.todense())
diff_mean = abs(cluster_mean - other_mean)
else:
print("using sparse matrix")
logging.info("using sparse matrix")
cluster_mean = exp_mean_sparse(adata.X.T[:, cluster_cells_ind])
other_mean = exp_mean_sparse(adata.X.T[:, other_cells_ind])
diff_mean = abs(cluster_mean - other_mean).A1
Expand All @@ -122,7 +166,7 @@ def run_neighbors_method_choice(adata, method, n_neighbors, n_pcs, metric, use_r
# useful if we are dealing with a MuData object but we want to use single rep, e.g.
# calculating neighbors on a totalVI latent rep
if method == "scanpy":
print("Computing neighbors using scanpy")
logging.info("Computing neighbors using scanpy")
from scanpy.pp import neighbors
neighbors(adata,
n_pcs=n_pcs,
Expand All @@ -131,7 +175,7 @@ def run_neighbors_method_choice(adata, method, n_neighbors, n_pcs, metric, use_r
use_rep=use_rep)
elif method == "hnsw":
from scvelo.pp import neighbors
print("Computing neighbors using hnswlib (with scvelo a la pegasus!)")
logging.info("Computing neighbors using hnswlib (with scvelo a la pegasus!)")
# we use the neighbors function from scvelo (thanks!)
# with parameters from pegasus (for a more exact result).
# code snippet from Steve Sansom, via COMBAT project
Expand Down
Loading