Multisample analysis#
Setup#
import missionbio.mosaic as ms
from IPython.display import display, HTML
# Load a multi-sample file
# By default, a multi-sample h5 file loads as a SampleGroup object
group = ms.load_example_dataset("Multisample PBMC")
# To analyze one sample at a time access, them using the samples attribute
sample = group.samples[1]
# These are the samples in the h5 file
[s.name for s in group.samples]
['Sample 1', 'Sample 2']
Applying functions#
# To apply a function across all samples use the `apply` method on the SampleGroup
# It returns a list of returned objects for each sample
def filt(sample):
filt_vars = sample.dna.filter_variants()
return filt_vars
filtered_variants = group.apply(filt)
filtered_variants
[array(['chr2:198266943:C/T', 'chr2:198267770:G/GAA', 'chr4:55599436:T/C',
'chr5:170837457:A/G', 'chr7:140449071:C/G', 'chr7:148504716:AG/A',
'chr7:148504854:A/AGACTT', 'chr7:148508833:A/G',
'chr7:148543525:A/G', 'chr7:148543583:G/C', 'chr7:148543693:TA/T',
'chr11:32417945:T/C', 'chr11:119148573:G/T', 'chr13:28602292:T/C',
'chr17:7578115:T/C', 'chr17:7579801:G/C', 'chr17:29559932:C/A',
'chr20:31024028:G/A', 'chrX:39933339:A/G', 'chrX:44833841:C/A',
'chrX:133547814:T/C'], dtype='<U23'),
array(['chr2:198266943:C/T', 'chr2:198267770:G/GAA', 'chr4:55599436:T/C',
'chr5:170837457:A/G', 'chr7:140449071:C/G', 'chr7:148504716:AG/A',
'chr7:148504854:A/AGACTT', 'chr7:148508833:A/G',
'chr7:148543525:A/G', 'chr7:148543583:G/C', 'chr7:148543693:TA/T',
'chr11:32417945:T/C', 'chr11:119148573:G/T', 'chr13:28602292:T/C',
'chr17:7578115:T/C', 'chr17:7579801:G/C', 'chr17:29559932:C/A',
'chr20:31024028:G/A', 'chrX:39933339:A/G', 'chrX:44833841:C/A',
'chrX:133547814:T/C'], dtype='<U23')]
# Subset the same variants in all dna assays
# It is important to maintain the same variants across all dna assays
og_num_vars = [s.dna.shape[1] for s in group.samples]
var_union = list(set().union(*filtered_variants))
for sample in group:
sample.dna = sample.dna[:, var_union] # Subsets all samples with the same variants
new_num_vars = [s.dna.shape[1] for s in group.samples]
print(og_num_vars, new_num_vars) # Thee old and new number of variants for each sample in the group
[21, 21] [21, 21]
# The functions applied on each sample can be more complex - like this assignment and relabeling method
# Note the original labels can be uncoordinated across samples in the group
# The labels are changed to ensure that each label is for the same clone
variants_of_interest = ['chr7:148508833:A/G', 'chr17:29559932:C/A', 'chr4:55599436:T/C']
def cluster(sample):
clone_table = sample.dna.group_by_genotype(variants_of_interest, max_ado_score=0.8)
# Rename labels so that each sample has the same labels
# Here the signature of each variant is used to rename the labels
df = sample.dna.signature("NGT").loc[:, variants_of_interest]
names = df.apply(lambda vs: "-".join([str(int(v)) for v in vs]), axis=1)
label_map = {i: n for i, n in names.items()}
# Don't rename the outlier categories:
for lab in ["missing", "small", "ADO"]:
del label_map[lab]
sample.dna.rename_labels(label_map)
clone_table = clone_table.rename(index=label_map)
return clone_table # Return the clone table
tables = group.apply(cluster)
for t in tables: # The clone tables for each sample
display(HTML(t.to_html()))
clone | 1 | 4 | 6 | Missing GT clones (4) | Small subclones (12) | ADO clones (4) |
---|---|---|---|---|---|---|
chr7:148508833:A/G | Het (50.2%) | WT (2.14%) | Het (35.02%) | Missing in 0.00% of cells | WT (32.3%) | Het (47.42%) |
chr17:29559932:C/A | Het (49.37%) | Het (51.98%) | Het (49.73%) | Missing in 0.10% of cells | Het (49.89%) | Het (50.23%) |
chr4:55599436:T/C | Hom (99.81%) | Het (53.44%) | Het (73.29%) | Missing in 0.67% of cells | Hom (70.91%) | Hom (99.71%) |
Total Cell Number | 1377 (70.87%) | 87 (4.48%) | 79 (4.07%) | 15 (0.77%) | 56 (2.88%) | 329 (16.93%) |
Sample 1 Cell Number | 1377 (70.87%) | 87 (4.48%) | 79 (4.07%) | 15 (0.77%) | 56 (2.88%) | 329 (16.93%) |
Parents | [6] | NaN | NaN | NaN | NaN | NaN |
Sisters | [small] | NaN | NaN | NaN | NaN | NaN |
ADO score | 0.0 | 0 | 0 | NaN | NaN | NaN |
clone | 1 | 4 | 6 | Missing GT clones (4) | Small subclones (7) | ADO clones (4) |
---|---|---|---|---|---|---|
chr7:148508833:A/G | WT (1.03%) | Het (29.15%) | Het (49.14%) | Missing in 0.00% of cells | WT (20.71%) | WT (0.95%) |
chr17:29559932:C/A | Het (49.45%) | Het (49.2%) | Het (49.25%) | Missing in 0.06% of cells | Het (47.46%) | Het (50.09%) |
chr4:55599436:T/C | Het (50.56%) | Het (70.28%) | Hom (99.42%) | Missing in 0.94% of cells | Hom (67.46%) | Het (47.08%) |
Total Cell Number | 1189 (65.76%) | 79 (4.37%) | 74 (4.09%) | 18 (1.0%) | 66 (3.65%) | 382 (21.13%) |
Sample 2 Cell Number | 1189 (65.76%) | 79 (4.37%) | 74 (4.09%) | 18 (1.0%) | 66 (3.65%) | 382 (21.13%) |
Parents | NaN | NaN | NaN | NaN | NaN | NaN |
Sisters | NaN | NaN | NaN | NaN | NaN | NaN |
ADO score | 0 | 0 | 0 | NaN | NaN | NaN |
Drawing figures#
# Displaying the same plot across all the samples
for sample in group:
fig = sample.dna.heatmap("NGT_FILTERED", features=variants_of_interest)
fig.show("jpg")
# Normalize Protein and look for patterns
for sample in group:
sample.protein.normalize_reads()
fig = sample.heatmap(("dna", "protein"))
fig.show("jpg")
# Ignore warnings raised when running clone_vs_analyte
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
variants_of_interest = ['chr17:29559932:C/A', 'chr4:55599436:T/C', 'chr7:148508833:A/G']
proteins_of_interest = ['CD19', "CD34", "CD30"]
clones_of_interest = ["0-1-1", "1-1-2"]
for sample in group:
s = sample[sample.dna.barcodes(clones_of_interest)]
s.dna = s.dna[:, variants_of_interest]
s.protein = s.protein[:, proteins_of_interest]
s.clone_vs_analyte("protein")
Multisample plots#
# Draw a fishplot for the dna labels
# From the proportions in the heatmaps - The two clones of interest are 0-1-1 and 2-20
fig = group.fishplot(
"dna",
sample_order=["Sample 1", "Sample 2"],
labels=["0-1-1", "1-1-2"],
parents=[None, None]
)
fig.show("jpg")
# Draw a barplot for the dna labels
fig = group.barplot(
"dna",
sample_order=["Sample 1", "Sample 2"],
label_order=["0-1-1", "1-1-2"],
percentage=True
)
fig.show("jpg")