marshall-2022

!lamin load scverse/spatial
💡 connected lamindb: scverse/spatial
import lamindb as ln

ln.context.uid = "VwUYGOmA3koC0000"
ln.context.track()
💡 connected lamindb: scverse/spatial
💡 notebook imports: anndata==0.10.7 lamindb==0.70.3 numpy==1.26.4 scanpy==1.10.1 vitessce==3.2.5
💡 loaded: Transform(uid='VwUYGOmA3koC65cN', name='marshall-2022', key='2024-04-02-marshall-2022', version='2', type='notebook', updated_at=2024-04-23 12:40:36 UTC, created_by_id=2)
💡 loaded: Run(uid='v6SbqImFKgQZdOBlsUcW', started_at=2024-04-23 13:45:52 UTC, is_consecutive=True, transform_id=20, created_by_id=2)

Download and process data

h5ad file downloaded from: https://lamin.ai/laminlabs/cellxgene/artifacts/zuSDjhCBRxYCOm8pY6SL

h5ad_artifact = (
    ln.Artifact.using("laminlabs/cellxgene").filter(uid="zuSDjhCBRxYCOm8pY6SL").one()
)
h5ad = h5ad_artifact.stage()

Convert h5ad to zarr.

Script below is from: https://github.com/vitessce/vitessce-python/blob/main/demos/marshall-2022/src/convert_to_zarr.py

from anndata import read_h5ad
import numpy as np
import scanpy as sc
from vitessce.data_utils import (
    to_diamond,
    to_uint8,
    optimize_adata,
)


def convert_h5ad_to_zarr(input_path, output_path):
    adata = read_h5ad(input_path)

    sc.pp.filter_cells(adata, min_genes=200)
    sc.pp.filter_genes(adata, min_cells=3)

    adata.var["mt"] = adata.var["feature_name"].str.startswith(
        "MT-"
    )  # annotate the group of mitochondrial genes as 'mt'
    sc.pp.calculate_qc_metrics(
        adata, qc_vars=["mt"], percent_top=None, log1p=False, inplace=True
    )

    sc.pp.normalize_total(adata, target_sum=1e4, inplace=True)
    sc.pp.log1p(adata)

    sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)

    adata_hvg = adata[:, adata.var["highly_variable"]].copy()
    sc.pp.regress_out(adata_hvg, ["total_counts", "pct_counts_mt"])
    sc.pp.scale(adata_hvg, max_value=3)

    adata.obsm["X_hvg"] = adata_hvg.X
    adata.obsm["X_hvg_uint8"] = to_uint8(adata_hvg.X, norm_along="var")

    num_cells = adata.obs.shape[0]
    adata.obsm["X_spatial"] = adata.obsm["X_spatial"]
    adata.obsm["X_segmentations"] = np.zeros((num_cells, 4, 2))
    radius = 10
    for i in range(num_cells):
        adata.obsm["X_segmentations"][i, :, :] = to_diamond(
            adata.obsm["X_spatial"][i, 0], adata.obsm["X_spatial"][i, 1], radius
        )

    adata = optimize_adata(
        adata,
        obs_cols=["cell_type"],
        var_cols=["feature_name"],
        obsm_keys=["X_hvg", "X_hvg_uint8", "X_umap", "X_spatial", "X_segmentations"],
        layer_keys=[],
    )

    adata.write_zarr(output_path, chunks=[adata.shape[0], 10])
/opt/miniconda3/envs/py310/lib/python3.10/site-packages/vitessce/__init__.py:42: UserWarning: Extra installs are necessary to use widgets: No module named 'anywidget'
  warn(f'Extra installs are necessary to use widgets: {e}')
zarr_filepath = "./marshall_2022_iscience.anndata.zarr"
convert_h5ad_to_zarr(h5ad, zarr_filepath)

Save dataset

zarr_artifact = ln.Artifact(
    zarr_filepath,
    description=(
        "Marshall et al., 2022 iScience. Spatial transcriptomics (Slide-seqV2) in the"
        " healthy human kidney (Puck_200903_13)"
    ),
)
zarr_artifact.save()

Create a VitessceConfig object

from vitessce import (
    VitessceConfig,
    Component as cm,
    AnnDataWrapper,
)

vc = VitessceConfig(
    schema_version="1.0.15",
    name="Marshall et al., 2022 iScience",
    description=zarr_artifact.description,
)

dataset = vc.add_dataset(name="marshall_2022").add_object(
    AnnDataWrapper(
        adata_url=zarr_artifact.path.to_url(),
        obs_feature_matrix_path="X",
        obs_embedding_paths=["obsm/X_umap"],
        obs_embedding_names=["UMAP"],
        obs_locations_path="obsm/X_spatial",
        obs_segmentations_path="obsm/X_segmentations",
        obs_set_paths=["obs/cell_type"],
        obs_set_names=["Bead Type"],
        feature_labels_path="var/feature_name",
    )
)

obs_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)
obs_set_sizes = vc.add_view(cm.OBS_SET_SIZES, dataset=dataset)
scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping="UMAP")
spatial = vc.add_view(cm.SPATIAL, dataset=dataset)
spatial_layers = vc.add_view(cm.LAYER_CONTROLLER, dataset=dataset)
genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)
description = vc.add_view(cm.DESCRIPTION, dataset=dataset)
vc.layout(
    (scatterplot | obs_sets | obs_set_sizes)
    / (spatial | spatial_layers | (genes | description))
)

vc.to_dict()
Hide code cell output
{'version': '1.0.15',
 'name': 'Marshall et al., 2022 iScience',
 'description': 'Marshall et al., 2022 iScience. Spatial transcriptomics (Slide-seqV2) in the healthy human kidney (Puck_200903_13)',
 'datasets': [{'uid': 'A',
   'name': 'marshall_2022',
   'files': [{'fileType': 'anndata.zarr',
     'url': 'https://scverse-spatial-eu-central-1.s3-eu-central-1.amazonaws.com/.lamindb/XIG5GCp05YtUAn9M.anndata.zarr',
     'options': {'obsLocations': {'path': 'obsm/X_spatial'},
      'obsSegmentations': {'path': 'obsm/X_segmentations'},
      'obsEmbedding': [{'path': 'obsm/X_umap',
        'dims': [0, 1],
        'embeddingType': 'UMAP'}],
      'obsSets': [{'name': 'Bead Type', 'path': 'obs/cell_type'}],
      'obsFeatureMatrix': {'path': 'X'},
      'featureLabels': {'path': 'var/feature_name'}}}]}],
 'coordinationSpace': {'dataset': {'A': 'A'}, 'embeddingType': {'A': 'UMAP'}},
 'layout': [{'component': 'obsSets',
   'coordinationScopes': {'dataset': 'A'},
   'x': 3.0,
   'y': 0.0,
   'w': 3.0,
   'h': 6.0},
  {'component': 'obsSetSizes',
   'coordinationScopes': {'dataset': 'A'},
   'x': 6.0,
   'y': 0.0,
   'w': 6.0,
   'h': 6.0},
  {'component': 'scatterplot',
   'coordinationScopes': {'dataset': 'A', 'embeddingType': 'A'},
   'x': 0.0,
   'y': 0.0,
   'w': 3.0,
   'h': 6.0},
  {'component': 'spatial',
   'coordinationScopes': {'dataset': 'A'},
   'x': 0.0,
   'y': 6.0,
   'w': 3.0,
   'h': 6.0},
  {'component': 'layerController',
   'coordinationScopes': {'dataset': 'A'},
   'x': 3.0,
   'y': 6.0,
   'w': 3.0,
   'h': 6.0},
  {'component': 'featureList',
   'coordinationScopes': {'dataset': 'A'},
   'x': 6.0,
   'y': 6.0,
   'w': 3.0,
   'h': 6.0},
  {'component': 'description',
   'coordinationScopes': {'dataset': 'A'},
   'x': 9.0,
   'y': 6.0,
   'w': 3.0,
   'h': 6.0}],
 'initStrategy': 'auto'}

Save VitessceConfig object

from lamindb.integrations import save_vitessce_config
vc_artifact = save_vitessce_config(vc, description="Marshall et al., 2022 iScience")
... uploading config.vitessce.json: 100.0%
💡 go to: https://lamin.ai/scverse/spatial/artifact/sKxat1sh8unY0n0IkqNb
ln.finish()
❗ cells [(9, 11)] were not run consecutively
... uploading 2024-04-02-marshall-2022.ipynb: 100.0%
✅ saved transform.source_code: Artifact(uid='uOM8vBTTl3u72AyybkwW', suffix='.ipynb', description='Source of transform VwUYGOmA3koC65cN', version='2', size=7404, hash='NXzsZfmqWWpsTk-wIDa8Yg', hash_type='md5', visibility=0, key_is_virtual=True, updated_at=2024-04-23 13:55:20 UTC, storage_id=2, created_by_id=2)
... uploading run_env_pip_v6SbqImFKgQZdOBlsUcW.txt: 100.0%
✅ saved run.environment: Artifact(uid='iWaFLxrJN9TDrBZhCqrf', suffix='.txt', description='requirements.txt', size=10914, hash='WMhSDgtH_xsYI0nHNDNzuA', hash_type='md5', visibility=0, key_is_virtual=True, updated_at=2024-04-23 13:55:20 UTC, storage_id=2, created_by_id=2)
... uploading 2024-04-02-marshall-2022.html: 100.0%
✅ saved transform.latest_report: Artifact(uid='xGaILnIF0wJNiu6Evv7a', suffix='.html', description='Report of run v6SbqImFKgQZdOBlsUcW', version='2', size=301331, hash='KF9Z0Cc9eCDFuuxS9hsPRg', hash_type='md5', visibility=0, key_is_virtual=True, updated_at=2024-04-23 13:55:20 UTC, storage_id=2, created_by_id=2)
✅ go to: https://lamin.ai/scverse/spatial/transform/VwUYGOmA3koC65cN