marshall-2022¶
!lamin load scverse/spatial
💡 connected lamindb: scverse/spatial
import lamindb as ln
ln.context.uid = "VwUYGOmA3koC0000"
ln.context.track()
💡 connected lamindb: scverse/spatial
💡 notebook imports: anndata==0.10.7 lamindb==0.70.3 numpy==1.26.4 scanpy==1.10.1 vitessce==3.2.5
💡 loaded: Transform(uid='VwUYGOmA3koC65cN', name='marshall-2022', key='2024-04-02-marshall-2022', version='2', type='notebook', updated_at=2024-04-23 12:40:36 UTC, created_by_id=2)
💡 loaded: Run(uid='v6SbqImFKgQZdOBlsUcW', started_at=2024-04-23 13:45:52 UTC, is_consecutive=True, transform_id=20, created_by_id=2)
Download and process data¶
h5ad file downloaded from: https://lamin.ai/laminlabs/cellxgene/artifacts/zuSDjhCBRxYCOm8pY6SL
h5ad_artifact = (
ln.Artifact.using("laminlabs/cellxgene").filter(uid="zuSDjhCBRxYCOm8pY6SL").one()
)
h5ad = h5ad_artifact.stage()
Convert h5ad to zarr.
Script below is from: https://github.com/vitessce/vitessce-python/blob/main/demos/marshall-2022/src/convert_to_zarr.py
from anndata import read_h5ad
import numpy as np
import scanpy as sc
from vitessce.data_utils import (
to_diamond,
to_uint8,
optimize_adata,
)
def convert_h5ad_to_zarr(input_path, output_path):
adata = read_h5ad(input_path)
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)
adata.var["mt"] = adata.var["feature_name"].str.startswith(
"MT-"
) # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(
adata, qc_vars=["mt"], percent_top=None, log1p=False, inplace=True
)
sc.pp.normalize_total(adata, target_sum=1e4, inplace=True)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata_hvg = adata[:, adata.var["highly_variable"]].copy()
sc.pp.regress_out(adata_hvg, ["total_counts", "pct_counts_mt"])
sc.pp.scale(adata_hvg, max_value=3)
adata.obsm["X_hvg"] = adata_hvg.X
adata.obsm["X_hvg_uint8"] = to_uint8(adata_hvg.X, norm_along="var")
num_cells = adata.obs.shape[0]
adata.obsm["X_spatial"] = adata.obsm["X_spatial"]
adata.obsm["X_segmentations"] = np.zeros((num_cells, 4, 2))
radius = 10
for i in range(num_cells):
adata.obsm["X_segmentations"][i, :, :] = to_diamond(
adata.obsm["X_spatial"][i, 0], adata.obsm["X_spatial"][i, 1], radius
)
adata = optimize_adata(
adata,
obs_cols=["cell_type"],
var_cols=["feature_name"],
obsm_keys=["X_hvg", "X_hvg_uint8", "X_umap", "X_spatial", "X_segmentations"],
layer_keys=[],
)
adata.write_zarr(output_path, chunks=[adata.shape[0], 10])
/opt/miniconda3/envs/py310/lib/python3.10/site-packages/vitessce/__init__.py:42: UserWarning: Extra installs are necessary to use widgets: No module named 'anywidget'
warn(f'Extra installs are necessary to use widgets: {e}')
zarr_filepath = "./marshall_2022_iscience.anndata.zarr"
convert_h5ad_to_zarr(h5ad, zarr_filepath)
Save dataset¶
zarr_artifact = ln.Artifact(
zarr_filepath,
description=(
"Marshall et al., 2022 iScience. Spatial transcriptomics (Slide-seqV2) in the"
" healthy human kidney (Puck_200903_13)"
),
)
zarr_artifact.save()
Create a VitessceConfig
object¶
from vitessce import (
VitessceConfig,
Component as cm,
AnnDataWrapper,
)
vc = VitessceConfig(
schema_version="1.0.15",
name="Marshall et al., 2022 iScience",
description=zarr_artifact.description,
)
dataset = vc.add_dataset(name="marshall_2022").add_object(
AnnDataWrapper(
adata_url=zarr_artifact.path.to_url(),
obs_feature_matrix_path="X",
obs_embedding_paths=["obsm/X_umap"],
obs_embedding_names=["UMAP"],
obs_locations_path="obsm/X_spatial",
obs_segmentations_path="obsm/X_segmentations",
obs_set_paths=["obs/cell_type"],
obs_set_names=["Bead Type"],
feature_labels_path="var/feature_name",
)
)
obs_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)
obs_set_sizes = vc.add_view(cm.OBS_SET_SIZES, dataset=dataset)
scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping="UMAP")
spatial = vc.add_view(cm.SPATIAL, dataset=dataset)
spatial_layers = vc.add_view(cm.LAYER_CONTROLLER, dataset=dataset)
genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)
description = vc.add_view(cm.DESCRIPTION, dataset=dataset)
vc.layout(
(scatterplot | obs_sets | obs_set_sizes)
/ (spatial | spatial_layers | (genes | description))
)
vc.to_dict()
Show code cell output
{'version': '1.0.15',
'name': 'Marshall et al., 2022 iScience',
'description': 'Marshall et al., 2022 iScience. Spatial transcriptomics (Slide-seqV2) in the healthy human kidney (Puck_200903_13)',
'datasets': [{'uid': 'A',
'name': 'marshall_2022',
'files': [{'fileType': 'anndata.zarr',
'url': 'https://scverse-spatial-eu-central-1.s3-eu-central-1.amazonaws.com/.lamindb/XIG5GCp05YtUAn9M.anndata.zarr',
'options': {'obsLocations': {'path': 'obsm/X_spatial'},
'obsSegmentations': {'path': 'obsm/X_segmentations'},
'obsEmbedding': [{'path': 'obsm/X_umap',
'dims': [0, 1],
'embeddingType': 'UMAP'}],
'obsSets': [{'name': 'Bead Type', 'path': 'obs/cell_type'}],
'obsFeatureMatrix': {'path': 'X'},
'featureLabels': {'path': 'var/feature_name'}}}]}],
'coordinationSpace': {'dataset': {'A': 'A'}, 'embeddingType': {'A': 'UMAP'}},
'layout': [{'component': 'obsSets',
'coordinationScopes': {'dataset': 'A'},
'x': 3.0,
'y': 0.0,
'w': 3.0,
'h': 6.0},
{'component': 'obsSetSizes',
'coordinationScopes': {'dataset': 'A'},
'x': 6.0,
'y': 0.0,
'w': 6.0,
'h': 6.0},
{'component': 'scatterplot',
'coordinationScopes': {'dataset': 'A', 'embeddingType': 'A'},
'x': 0.0,
'y': 0.0,
'w': 3.0,
'h': 6.0},
{'component': 'spatial',
'coordinationScopes': {'dataset': 'A'},
'x': 0.0,
'y': 6.0,
'w': 3.0,
'h': 6.0},
{'component': 'layerController',
'coordinationScopes': {'dataset': 'A'},
'x': 3.0,
'y': 6.0,
'w': 3.0,
'h': 6.0},
{'component': 'featureList',
'coordinationScopes': {'dataset': 'A'},
'x': 6.0,
'y': 6.0,
'w': 3.0,
'h': 6.0},
{'component': 'description',
'coordinationScopes': {'dataset': 'A'},
'x': 9.0,
'y': 6.0,
'w': 3.0,
'h': 6.0}],
'initStrategy': 'auto'}
Save VitessceConfig
object¶
from lamindb.integrations import save_vitessce_config
vc_artifact = save_vitessce_config(vc, description="Marshall et al., 2022 iScience")
... uploading config.vitessce.json: 100.0%
💡 go to: https://lamin.ai/scverse/spatial/artifact/sKxat1sh8unY0n0IkqNb
ln.finish()
❗ cells [(9, 11)] were not run consecutively
... uploading 2024-04-02-marshall-2022.ipynb: 100.0%
✅ saved transform.source_code: Artifact(uid='uOM8vBTTl3u72AyybkwW', suffix='.ipynb', description='Source of transform VwUYGOmA3koC65cN', version='2', size=7404, hash='NXzsZfmqWWpsTk-wIDa8Yg', hash_type='md5', visibility=0, key_is_virtual=True, updated_at=2024-04-23 13:55:20 UTC, storage_id=2, created_by_id=2)
... uploading run_env_pip_v6SbqImFKgQZdOBlsUcW.txt: 100.0%
✅ saved run.environment: Artifact(uid='iWaFLxrJN9TDrBZhCqrf', suffix='.txt', description='requirements.txt', size=10914, hash='WMhSDgtH_xsYI0nHNDNzuA', hash_type='md5', visibility=0, key_is_virtual=True, updated_at=2024-04-23 13:55:20 UTC, storage_id=2, created_by_id=2)
... uploading 2024-04-02-marshall-2022.html: 100.0%
✅ saved transform.latest_report: Artifact(uid='xGaILnIF0wJNiu6Evv7a', suffix='.html', description='Report of run v6SbqImFKgQZdOBlsUcW', version='2', size=301331, hash='KF9Z0Cc9eCDFuuxS9hsPRg', hash_type='md5', visibility=0, key_is_virtual=True, updated_at=2024-04-23 13:55:20 UTC, storage_id=2, created_by_id=2)
✅ go to: https://lamin.ai/scverse/spatial/transform/VwUYGOmA3koC65cN