Skip to content

Commit

Permalink
Merge pull request #589 from dandi/temp-test-girder
Browse files Browse the repository at this point in the history
a script to validate dandi-api collection listing against girder
  • Loading branch information
yarikoptic authored Apr 26, 2021
2 parents 6c61f1e + 20dffc9 commit 242c5af
Showing 1 changed file with 80 additions and 0 deletions.
80 changes: 80 additions & 0 deletions tools/validate-api-against-girder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env python3
import click
import requests

from dandi.dandiapi import DandiAPIClient
from dandi.dandiset import APIDandiset
from dandi.girder import GirderCli


def adhoc_list_girder(_id, client, prefix=""):
"""Pure girder API has no recursive listing, so let's do it manually"""
res = list()
ret = []
for r in client.listItem(_id):
assert r.get("_modelType", None) == "item"
f = list(client.listFile(r["_id"]))
if len(f) == 0:
print(f" Empty item with prefix={prefix}: {r}")
continue
if len(f) != 1:
print("Multiple files for an item still found!")
print(f)
import pdb

pdb.set_trace()
else:
f = f[0]
assert f["size"] == r["size"]
yield (f"{prefix}{r['name']}", r["size"])

for r in client.listFolder(_id, "folder"):
assert r.get("_modelType", None) == "folder"
yield from adhoc_list_girder(r["_id"], client, f"{prefix}{r['name']}/")


@click.command()
def main():
g_client = GirderCli("http://3.19.164.171")
a_client = DandiAPIClient("https://api.dandiarchive.org/api")

with a_client.session():
g_client.dandi_authenticate()
# gather all dandisets known to girder: hardcoded _id for "drafts" collection
g_dandisets = list(
g_client.listFolder("5e59bb0af19e820ab6ea6c62", "collection")
)
for dandiset, girder_id in [(x["name"], x["_id"]) for x in g_dandisets]:
if dandiset != "000026":
continue
print(f"DANDI:{dandiset}", end="\t")
g_meta, g_assets_ = g_client.get_dandiset_and_assets(girder_id, "folder")
g_assets = list(g_assets_)
# harmonize and get only what we care about ATM - path and size,
# or otherwise we would need to query each asset for metadata
g_assets_h = set((a["path"].lstrip("/"), a["size"]) for a in g_assets)

# Yarik trusts nobody. Two identical bugs are less likely!
g_assets_adhoc = set(adhoc_list_girder(girder_id, g_client))

if g_assets_h != g_assets_adhoc:
print("ad-hoc and dandi listing of girder differs!")
import pdb

pdb.set_trace()

a_meta, a_assets_ = a_client.get_dandiset_and_assets(dandiset, "draft")
a_assets = list(a_assets_)
a_assets_h = set((a["path"].lstrip("/"), a["size"]) for a in a_assets)

if a_assets_h != g_assets_h:
print("differs")
import pdb

pdb.set_trace()
else:
print(f"{len(a_assets)} assets the same")


if __name__ == "__main__":
main()

0 comments on commit 242c5af

Please sign in to comment.