diff --git a/Cargo.lock b/Cargo.lock index 829e825..1e69294 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -81,7 +81,7 @@ dependencies = [ "const-random", "getrandom", "once_cell", - "version_check", + "version_check 0.9.5", "zerocopy", ] @@ -130,6 +130,55 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anstyle-parse" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" +dependencies = [ + "anstyle", + "windows-sys 0.59.0", +] + [[package]] name = "anyhow" version = "1.0.95" @@ -142,7 +191,7 @@ version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ceb7c683b2f8f40970b70e39ff8be514c95b96fcb9c4af87e1ed2cb2e10801a0" dependencies = [ - "bzip2", + "bzip2 0.4.4", "crc32fast", "digest", "lazy_static", @@ -298,7 +347,7 @@ dependencies = [ "arrow-schema", "arrow-select", "atoi", - "base64", + "base64 0.22.1", "chrono", "comfy-table", "half", @@ -408,7 +457,7 @@ version = "53.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b95513080e728e4cec37f1ff5af4f12c9688d47795d17cda80b6ec2cf74d4678" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", ] [[package]] @@ -460,7 +509,7 @@ version = "0.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df895a515f70646414f4b45c0b79082783b80552b373a68283012928df56f522" dependencies = [ - "bzip2", + "bzip2 0.4.4", "flate2", "futures-core", "futures-io", @@ -500,7 +549,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -511,7 +560,7 @@ checksum = "3f934833b4b7233644e5848f235df3f57ed8c80f1528a26c3dfa13d2147fa056" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -535,6 +584,20 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "attohttpc" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e57d6e7a84f33ff3316e97af3180fe7f86597a6a60161c0be70c0e45f382620" +dependencies = [ + "http 0.2.12", + "log", + "rustls 0.21.12", + "rustls-native-certs 0.6.3", + "url", + "webpki", +] + [[package]] name = "autocfg" version = "1.4.0" @@ -555,6 +618,303 @@ dependencies = [ "snap", ] +[[package]] +name = "aws-config" +version = "1.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c03a50b30228d3af8865ce83376b4e99e1ffa34728220fe2860e4df0bb5278d6" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sdk-sso", + "aws-sdk-ssooidc", + "aws-sdk-sts", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "hex", + "http 0.2.12", + "ring", + "time", + "tokio", + "tracing", + "url", + "zeroize", +] + +[[package]] +name = "aws-credential-types" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60e8f6b615cb5fc60a98132268508ad104310f0cfb25a1c22eee76efdf9154da" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "zeroize", +] + +[[package]] +name = "aws-runtime" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b16d1aa50accc11a4b4d5c50f7fb81cc0cf60328259c587d0e6b0f11385bde46" +dependencies = [ + "aws-credential-types", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http-body 0.4.6", + "once_cell", + "percent-encoding", + "pin-project-lite", + "tracing", + "uuid", +] + +[[package]] +name = "aws-sdk-sso" +version = "1.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1605dc0bf9f0a4b05b451441a17fcb0bda229db384f23bf5cead3adbab0664ac" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "http 0.2.12", + "once_cell", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-ssooidc" +version = "1.54.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59f3f73466ff24f6ad109095e0f3f2c830bfb4cd6c8b12f744c8e61ebf4d3ba1" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "http 0.2.12", + "once_cell", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-sts" +version = "1.54.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "249b2acaa8e02fd4718705a9494e3eb633637139aa4bb09d70965b0448e865db" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "http 0.2.12", + "once_cell", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sigv4" +version = "1.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d3820e0c08d0737872ff3c7c1f21ebbb6693d832312d6152bf18ef50a5471c2" +dependencies = [ + "aws-credential-types", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "form_urlencoded", + "hex", + "hmac", + "http 0.2.12", + "http 1.2.0", + "once_cell", + "percent-encoding", + "sha2", + "time", + "tracing", +] + +[[package]] +name = "aws-smithy-async" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "427cb637d15d63d6f9aae26358e1c9a9c09d5aa490d64b09354c8217cfef0f28" +dependencies = [ + "futures-util", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "aws-smithy-http" +version = "0.60.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c8bc3e8fdc6b8d07d976e301c02fe553f72a39b7a9fea820e023268467d7ab6" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "http 0.2.12", + "http-body 0.4.6", + "once_cell", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + +[[package]] +name = "aws-smithy-json" +version = "0.61.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee4e69cc50921eb913c6b662f8d909131bb3e6ad6cb6090d3a39b66fc5c52095" +dependencies = [ + "aws-smithy-types", +] + +[[package]] +name = "aws-smithy-query" +version = "0.60.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2fbd61ceb3fe8a1cb7352e42689cec5335833cd9f94103a61e98f9bb61c64bb" +dependencies = [ + "aws-smithy-types", + "urlencoding", +] + +[[package]] +name = "aws-smithy-runtime" +version = "1.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a05dd41a70fc74051758ee75b5c4db2c0ca070ed9229c3df50e9475cda1cb985" +dependencies = [ + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "fastrand", + "h2 0.3.26", + "http 0.2.12", + "http-body 0.4.6", + "http-body 1.0.1", + "httparse", + "hyper 0.14.32", + "hyper-rustls 0.24.2", + "once_cell", + "pin-project-lite", + "pin-utils", + "rustls 0.21.12", + "tokio", + "tracing", +] + +[[package]] +name = "aws-smithy-runtime-api" +version = "1.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92165296a47a812b267b4f41032ff8069ab7ff783696d217f0994a0d7ab585cd" +dependencies = [ + "aws-smithy-async", + "aws-smithy-types", + "bytes", + "http 0.2.12", + "http 1.2.0", + "pin-project-lite", + "tokio", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-types" +version = "1.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38ddc9bd6c28aeb303477170ddd183760a956a03e083b3902a990238a7e3792d" +dependencies = [ + "base64-simd", + "bytes", + "bytes-utils", + "http 0.2.12", + "http 1.2.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "itoa", + "num-integer", + "pin-project-lite", + "pin-utils", + "ryu", + "serde", + "time", +] + +[[package]] +name = "aws-smithy-xml" +version = "0.60.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab0b0166827aa700d3dc519f72f8b3a91c35d0b8d042dc5d643a91e6f80648fc" +dependencies = [ + "xmlparser", +] + +[[package]] +name = "aws-types" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5221b91b3e441e6675310829fd8984801b772cb1546ef6c0e54dec9f1ac13fef" +dependencies = [ + "aws-credential-types", + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "rustc_version 0.4.1", + "tracing", +] + [[package]] name = "backtrace" version = "0.3.74" @@ -570,12 +930,65 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + [[package]] name = "base64" version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] + +[[package]] +name = "bigtools" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91f1c37f11670ccdb1b49313e8244be46543d06e9e68aae3d182796d8f78f256" +dependencies = [ + "anyhow", + "attohttpc", + "bincode", + "byteorder", + "byteordered", + "bytes", + "clap", + "crossbeam-channel", + "crossbeam-utils", + "futures", + "index_list", + "itertools 0.10.5", + "libdeflater", + "ryu", + "serde", + "smallvec", + "tempfile", + "thiserror 1.0.69", + "tokio", + "ufmt", +] + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bio" version = "2.0.3" @@ -593,7 +1006,7 @@ dependencies = [ "editdistancek", "enum-map", "fxhash", - "itertools", + "itertools 0.13.0", "itertools-num", "lazy_static", "multimap", @@ -651,9 +1064,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.6.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +checksum = "1be3f42a67d6d345ecd59f675f3f012d6974981560836e938c22b424b85ce1be" dependencies = [ "serde", ] @@ -719,6 +1132,17 @@ dependencies = [ "alloc-stdlib", ] +[[package]] +name = "bstr" +version = "1.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "531a9155a481e2ee699d4f98f43c0ca4ff8ee1bfd55c31e9e98fb29d2b176fe0" +dependencies = [ + "memchr", + "regex-automata", + "serde", +] + [[package]] name = "bumpalo" version = "3.16.0" @@ -758,7 +1182,7 @@ checksum = "3fa76293b4f7bb636ab88fd78228235b5248b4d05cc589aed610f954af5d7c7a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -767,12 +1191,31 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "byteordered" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbf2cd9424f5ff404aba1959c835cbc448ee8b689b870a9981c76c0fd46280e6" +dependencies = [ + "byteorder", +] + [[package]] name = "bytes" version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" +[[package]] +name = "bytes-utils" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" +dependencies = [ + "bytes", + "either", +] + [[package]] name = "bzip2" version = "0.4.4" @@ -783,6 +1226,16 @@ dependencies = [ "libc", ] +[[package]] +name = "bzip2" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bafdbf26611df8c14810e268ddceda071c297570a5fb360ceddf617fe417ef58" +dependencies = [ + "bzip2-sys", + "libc", +] + [[package]] name = "bzip2-sys" version = "0.1.11+1.0.8" @@ -887,6 +1340,52 @@ dependencies = [ "half", ] +[[package]] +name = "circular" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fc239e0f6cb375d2402d48afb92f76f5404fd1df208a41930ec81eda078bea" + +[[package]] +name = "clap" +version = "4.5.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8eb5e908ef3a6efbe1ed62520fb7287959888c88485abe072543190ecc66783" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96b01801b5fc6a0a232407abc821660c9c6d25a1cafc0d4f85f29fb8d9afc121" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54b755194d6389280185988721fffba69495eed5ee9feeee9a599b53db80318c" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.96", +] + +[[package]] +name = "clap_lex" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" + [[package]] name = "clipboard-win" version = "5.4.0" @@ -911,6 +1410,12 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240f9610db0e586042f50260506972820ef10d5eb9a0e867a00f8cfe0a238be3" +[[package]] +name = "colorchoice" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + [[package]] name = "comfy-table" version = "7.1.3" @@ -970,6 +1475,16 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation" version = "0.10.0" @@ -1092,7 +1607,7 @@ version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "crossterm_winapi", "parking_lot", "rustix", @@ -1186,7 +1701,7 @@ dependencies = [ "async-compression", "async-trait", "bytes", - "bzip2", + "bzip2 0.4.4", "chrono", "dashmap", "datafusion-catalog", @@ -1210,7 +1725,7 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap", - "itertools", + "itertools 0.13.0", "log", "num-traits", "num_cpus", @@ -1335,7 +1850,7 @@ checksum = "4da0f3cb4669f9523b403d6b5a0ec85023e0ab3bf0183afd1517475b3e64fdd2" dependencies = [ "arrow", "datafusion-common", - "itertools", + "itertools 0.13.0", "paste", ] @@ -1365,7 +1880,7 @@ checksum = "f52c4012648b34853e40a2c6bcaa8772f837831019b68aca384fb38436dba162" dependencies = [ "arrow", "arrow-buffer", - "base64", + "base64 0.22.1", "blake2", "blake3", "chrono", @@ -1374,7 +1889,7 @@ dependencies = [ "datafusion-expr", "hashbrown 0.14.5", "hex", - "itertools", + "itertools 0.13.0", "log", "md-5", "rand", @@ -1436,7 +1951,7 @@ dependencies = [ "datafusion-functions", "datafusion-functions-aggregate", "datafusion-physical-expr-common", - "itertools", + "itertools 0.13.0", "log", "paste", "rand", @@ -1481,7 +1996,7 @@ dependencies = [ "datafusion-physical-expr", "hashbrown 0.14.5", "indexmap", - "itertools", + "itertools 0.13.0", "log", "paste", "regex-syntax", @@ -1509,7 +2024,7 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap", - "itertools", + "itertools 0.13.0", "log", "paste", "petgraph", @@ -1542,7 +2057,7 @@ dependencies = [ "datafusion-expr-common", "datafusion-physical-expr", "datafusion-physical-plan", - "itertools", + "itertools 0.13.0", ] [[package]] @@ -1571,7 +2086,7 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap", - "itertools", + "itertools 0.13.0", "log", "once_cell", "parking_lot", @@ -1650,6 +2165,15 @@ dependencies = [ "strum 0.26.3", ] +[[package]] +name = "deranged" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", +] + [[package]] name = "derive-new" version = "0.6.0" @@ -1658,7 +2182,7 @@ checksum = "d150dea618e920167e5973d70ae6ece4385b7164e0d799fe7c122dd0a5d912ad" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -1680,7 +2204,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -1736,7 +2260,7 @@ checksum = "f282cfdfe92516eb26c2af8589c274c7c17681f5ecc03c18255fe741c6aa64eb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -1748,7 +2272,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -1757,6 +2281,20 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "err-derive" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34a887c8df3ed90498c1c437ce21f211c8e27672921a8ffa293cb8d6d4caa9e" +dependencies = [ + "proc-macro-error", + "proc-macro2", + "quote", + "rustversion", + "syn 1.0.109", + "synstructure 0.12.6", +] + [[package]] name = "errno" version = "0.3.10" @@ -1779,6 +2317,284 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b90ca2580b73ab6a1f724b76ca11ab632df820fd6040c336200d2c1df7b3c82c" +[[package]] +name = "exon" +version = "0.32.4" +source = "git+https://github.com/mwiewior/exon.git?rev=c543c03937ce5c8f249a77e45a28d7138e0a9c0f#c543c03937ce5c8f249a77e45a28d7138e0a9c0f" +dependencies = [ + "arrow", + "async-trait", + "bytes", + "datafusion", + "emojis", + "exon-bam", + "exon-bcf", + "exon-bed", + "exon-bigwig", + "exon-common", + "exon-cram", + "exon-fasta", + "exon-fastq", + "exon-fcs", + "exon-genbank", + "exon-gff", + "exon-gtf", + "exon-io", + "exon-mzml", + "exon-sam", + "exon-sdf", + "exon-vcf", + "futures", + "fxhash", + "itertools 0.13.0", + "lazy_static", + "noodles", + "num_cpus", + "object_store", + "pin-project", + "regex", + "sequila-core", + "serde", + "tokio", + "tokio-util", + "tracing", + "tracing-subscriber", + "url", +] + +[[package]] +name = "exon-bam" +version = "0.32.4" +source = "git+https://github.com/mwiewior/exon.git?rev=c543c03937ce5c8f249a77e45a28d7138e0a9c0f#c543c03937ce5c8f249a77e45a28d7138e0a9c0f" +dependencies = [ + "arrow", + "exon-common", + "exon-sam", + "futures", + "noodles", + "object_store", + "tokio", +] + +[[package]] +name = "exon-bcf" +version = "0.32.4" +source = "git+https://github.com/mwiewior/exon.git?rev=c543c03937ce5c8f249a77e45a28d7138e0a9c0f#c543c03937ce5c8f249a77e45a28d7138e0a9c0f" +dependencies = [ + "arrow", + "exon-common", + "exon-vcf", + "futures", + "noodles", + "object_store", + "tokio", +] + +[[package]] +name = "exon-bed" +version = "0.32.4" +source = "git+https://github.com/mwiewior/exon.git?rev=c543c03937ce5c8f249a77e45a28d7138e0a9c0f#c543c03937ce5c8f249a77e45a28d7138e0a9c0f" +dependencies = [ + "arrow", + "bstr", + "exon-common", + "futures", + "noodles", + "object_store", + "tokio", +] + +[[package]] +name = "exon-bigwig" +version = "0.32.4" +source = "git+https://github.com/mwiewior/exon.git?rev=c543c03937ce5c8f249a77e45a28d7138e0a9c0f#c543c03937ce5c8f249a77e45a28d7138e0a9c0f" +dependencies = [ + "arrow", + "bigtools", + "exon-common", + "futures", + "noodles", + "object_store", + "tokio", +] + +[[package]] +name = "exon-common" +version = "0.32.4" +source = "git+https://github.com/mwiewior/exon.git?rev=c543c03937ce5c8f249a77e45a28d7138e0a9c0f#c543c03937ce5c8f249a77e45a28d7138e0a9c0f" +dependencies = [ + "arrow", + "datafusion", + "futures", + "glob", + "object_store", + "url", +] + +[[package]] +name = "exon-cram" +version = "0.32.4" +source = "git+https://github.com/mwiewior/exon.git?rev=c543c03937ce5c8f249a77e45a28d7138e0a9c0f#c543c03937ce5c8f249a77e45a28d7138e0a9c0f" +dependencies = [ + "arrow", + "coitrees", + "exon-common", + "exon-sam", + "futures", + "noodles", + "object_store", + "tokio", + "tracing", +] + +[[package]] +name = "exon-fasta" +version = "0.32.4" +source = "git+https://github.com/mwiewior/exon.git?rev=c543c03937ce5c8f249a77e45a28d7138e0a9c0f#c543c03937ce5c8f249a77e45a28d7138e0a9c0f" +dependencies = [ + "arrow", + "exon-common", + "futures", + "noodles", + "object_store", + "tokio", +] + +[[package]] +name = "exon-fastq" +version = "0.32.4" +source = "git+https://github.com/mwiewior/exon.git?rev=c543c03937ce5c8f249a77e45a28d7138e0a9c0f#c543c03937ce5c8f249a77e45a28d7138e0a9c0f" +dependencies = [ + "arrow", + "exon-common", + "futures", + "noodles", + "object_store", + "tokio", +] + +[[package]] +name = "exon-fcs" +version = "0.32.4" +source = "git+https://github.com/mwiewior/exon.git?rev=c543c03937ce5c8f249a77e45a28d7138e0a9c0f#c543c03937ce5c8f249a77e45a28d7138e0a9c0f" +dependencies = [ + "arrow", + "byteorder", + "exon-common", + "futures", + "object_store", + "tokio", +] + +[[package]] +name = "exon-genbank" +version = "0.32.4" +source = "git+https://github.com/mwiewior/exon.git?rev=c543c03937ce5c8f249a77e45a28d7138e0a9c0f#c543c03937ce5c8f249a77e45a28d7138e0a9c0f" +dependencies = [ + "arrow", + "exon-common", + "futures", + "gb-io", + "object_store", + "tokio", +] + +[[package]] +name = "exon-gff" +version = "0.32.4" +source = "git+https://github.com/mwiewior/exon.git?rev=c543c03937ce5c8f249a77e45a28d7138e0a9c0f#c543c03937ce5c8f249a77e45a28d7138e0a9c0f" +dependencies = [ + "arrow", + "exon-common", + "futures", + "noodles", + "object_store", + "tokio", +] + +[[package]] +name = "exon-gtf" +version = "0.32.4" +source = "git+https://github.com/mwiewior/exon.git?rev=c543c03937ce5c8f249a77e45a28d7138e0a9c0f#c543c03937ce5c8f249a77e45a28d7138e0a9c0f" +dependencies = [ + "arrow", + "exon-common", + "futures", + "noodles", + "object_store", + "tokio", +] + +[[package]] +name = "exon-io" +version = "0.32.4" +source = "git+https://github.com/mwiewior/exon.git?rev=c543c03937ce5c8f249a77e45a28d7138e0a9c0f#c543c03937ce5c8f249a77e45a28d7138e0a9c0f" +dependencies = [ + "async-trait", + "aws-config", + "aws-credential-types", + "object_store", + "tokio", + "url", +] + +[[package]] +name = "exon-mzml" +version = "0.32.4" +source = "git+https://github.com/mwiewior/exon.git?rev=c543c03937ce5c8f249a77e45a28d7138e0a9c0f#c543c03937ce5c8f249a77e45a28d7138e0a9c0f" +dependencies = [ + "arrow", + "base64 0.22.1", + "byteorder", + "exon-common", + "flate2", + "futures", + "object_store", + "quick-xml", + "serde", + "tokio", +] + +[[package]] +name = "exon-sam" +version = "0.32.4" +source = "git+https://github.com/mwiewior/exon.git?rev=c543c03937ce5c8f249a77e45a28d7138e0a9c0f#c543c03937ce5c8f249a77e45a28d7138e0a9c0f" +dependencies = [ + "arrow", + "exon-common", + "futures", + "noodles", + "object_store", + "tokio", +] + +[[package]] +name = "exon-sdf" +version = "0.32.4" +source = "git+https://github.com/mwiewior/exon.git?rev=c543c03937ce5c8f249a77e45a28d7138e0a9c0f#c543c03937ce5c8f249a77e45a28d7138e0a9c0f" +dependencies = [ + "arrow", + "exon-common", + "futures", + "object_store", + "regex", + "tokio", + "tracing", +] + +[[package]] +name = "exon-vcf" +version = "0.32.4" +source = "git+https://github.com/mwiewior/exon.git?rev=c543c03937ce5c8f249a77e45a28d7138e0a9c0f#c543c03937ce5c8f249a77e45a28d7138e0a9c0f" +dependencies = [ + "arrow", + "exon-common", + "futures", + "noodles", + "object_store", + "tokio", +] + [[package]] name = "fallible-streaming-iterator" version = "0.1.9" @@ -1910,6 +2726,7 @@ dependencies = [ "futures-core", "futures-task", "futures-util", + "num_cpus", ] [[package]] @@ -1926,7 +2743,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -1968,6 +2785,23 @@ dependencies = [ "byteorder", ] +[[package]] +name = "gb-io" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "387616b1e67a2c326231a86f1a62fb98e94f1da7c089817a00e332a96c2dbc21" +dependencies = [ + "circular", + "err-derive", + "itertools 0.10.5", + "log", + "nom", + "serde", + "serde_bytes", + "string_cache", + "string_cache_codegen", +] + [[package]] name = "generational-arena" version = "0.2.9" @@ -1984,7 +2818,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", - "version_check", + "version_check 0.9.5", ] [[package]] @@ -2022,6 +2856,25 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" +[[package]] +name = "h2" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "h2" version = "0.4.7" @@ -2033,7 +2886,7 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http", + "http 1.2.0", "indexmap", "slab", "tokio", @@ -2111,6 +2964,15 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + [[package]] name = "home" version = "0.5.11" @@ -2120,6 +2982,17 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http" version = "1.2.0" @@ -2131,6 +3004,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + [[package]] name = "http-body" version = "1.0.1" @@ -2138,7 +3022,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http", + "http 1.2.0", ] [[package]] @@ -2149,8 +3033,8 @@ checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" dependencies = [ "bytes", "futures-util", - "http", - "http-body", + "http 1.2.0", + "http-body 1.0.1", "pin-project-lite", ] @@ -2160,12 +3044,42 @@ version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946" +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + [[package]] name = "humantime" version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.26", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.5.2" @@ -2175,9 +3089,9 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "h2", - "http", - "http-body", + "h2 0.4.7", + "http 1.2.0", + "http-body 1.0.1", "httparse", "itoa", "pin-project-lite", @@ -2186,6 +3100,22 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "log", + "rustls 0.21.12", + "rustls-native-certs 0.6.3", + "tokio", + "tokio-rustls 0.24.1", +] + [[package]] name = "hyper-rustls" version = "0.27.5" @@ -2193,14 +3123,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d191583f3da1305256f22463b9bb0471acad48a4e534a5218b9963e9c1f59b2" dependencies = [ "futures-util", - "http", - "hyper", + "http 1.2.0", + "hyper 1.5.2", "hyper-util", - "rustls", - "rustls-native-certs", + "rustls 0.23.21", + "rustls-native-certs 0.8.1", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.1", "tower-service", ] @@ -2213,9 +3143,9 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "http", - "http-body", - "hyper", + "http 1.2.0", + "http-body 1.0.1", + "hyper 1.5.2", "pin-project-lite", "socket2", "tokio", @@ -2361,7 +3291,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -2385,6 +3315,12 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "index_list" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa38453685e5fe724fd23ff6c1a158c1e2ca21ce0c2718fa11e96e70e99fd4de" + [[package]] name = "indexmap" version = "2.7.0" @@ -2435,6 +3371,21 @@ version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708" +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -2571,6 +3522,24 @@ version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" +[[package]] +name = "libdeflate-sys" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c301042beb41d94bc0f8dc667712f8fa8c42d3ea058dd7a71bed3fee8370c75e" +dependencies = [ + "cc", +] + +[[package]] +name = "libdeflater" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea90e8df8addcafac4c8737aabf1597f2e83320d58552d8594a52f74cbf24d2e" +dependencies = [ + "libdeflate-sys", +] + [[package]] name = "libflate" version = "1.4.0" @@ -2864,7 +3833,7 @@ checksum = "254a5372af8fc138e36684761d3c0cdb758a4410e938babcff1c860ce14ddbfc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -2882,6 +3851,12 @@ dependencies = [ "rawpointer", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "newtype_derive" version = "0.1.6" @@ -2891,6 +3866,253 @@ dependencies = [ "rustc_version 0.1.7", ] +[[package]] +name = "nom" +version = "4.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6" +dependencies = [ + "memchr", + "version_check 0.1.5", +] + +[[package]] +name = "noodles" +version = "0.87.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3af819286693ced8c3d26c0bb1c84473767e48489fe1d75c8563966fb1c9fe08" +dependencies = [ + "noodles-bam", + "noodles-bcf", + "noodles-bed", + "noodles-bgzf", + "noodles-core", + "noodles-cram", + "noodles-csi", + "noodles-fasta", + "noodles-fastq", + "noodles-gff", + "noodles-gtf", + "noodles-sam", + "noodles-tabix", + "noodles-vcf", +] + +[[package]] +name = "noodles-bam" +version = "0.72.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc74e9d2e9baefcbd8b945a6a729cfe31dfafb281121567493cc48d31bd35c59" +dependencies = [ + "bstr", + "byteorder", + "futures", + "indexmap", + "memchr", + "noodles-bgzf", + "noodles-core", + "noodles-csi", + "noodles-sam", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "noodles-bcf" +version = "0.66.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5eda01d014e50b5ef2624dbfa566bfab4dbc8a2726dad2c9aebd0caca70e0fb" +dependencies = [ + "byteorder", + "futures", + "indexmap", + "noodles-bgzf", + "noodles-core", + "noodles-csi", + "noodles-vcf", + "tokio", +] + +[[package]] +name = "noodles-bed" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13396d1aa63855efe5289b3711a191c007f08a89366ebe85acab2ae07883f8f7" +dependencies = [ + "bstr", + "lexical-core", + "memchr", + "noodles-core", +] + +[[package]] +name = "noodles-bgzf" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e624384981e5847bfd6a026f157c45d687187c30ee21b8c435310267c7aa7ab" +dependencies = [ + "byteorder", + "bytes", + "crossbeam-channel", + "flate2", + "futures", + "pin-project-lite", + "tokio", + "tokio-util", +] + +[[package]] +name = "noodles-core" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5a8c6b020d1205abef2b0fab4463a6c5ecc3c8f4d561ca8b0d1a42323376200" +dependencies = [ + "bstr", +] + +[[package]] +name = "noodles-cram" +version = "0.73.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40fe74bd6d21bbb1a470273f4bcc1fe61221d1a2ce5a775049d88754999f6591" +dependencies = [ + "async-compression", + "bitflags 2.7.0", + "bstr", + "byteorder", + "bytes", + "bzip2 0.5.0", + "flate2", + "futures", + "indexmap", + "md-5", + "noodles-bam", + "noodles-core", + "noodles-fasta", + "noodles-sam", + "pin-project-lite", + "tokio", + "xz2", +] + +[[package]] +name = "noodles-csi" +version = "0.41.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "199113fe53fef2d79b0a9f670d1cad524b4ddcefdc1629dc69f0eb2707212c9e" +dependencies = [ + "bit-vec", + "bstr", + "byteorder", + "indexmap", + "noodles-bgzf", + "noodles-core", + "tokio", +] + +[[package]] +name = "noodles-fasta" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16862f9e1bf1ad825a1fab6fc29da9e950dd477cfcd0cb1a2b14fa8ee1a72575" +dependencies = [ + "bstr", + "bytes", + "memchr", + "noodles-bgzf", + "noodles-core", + "tokio", +] + +[[package]] +name = "noodles-fastq" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1606247d99eae65370cdb0ef5590f109a5286d57c06da8e738466cf95a4509d5" +dependencies = [ + "bstr", + "futures", + "memchr", + "tokio", +] + +[[package]] +name = "noodles-gff" +version = "0.41.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5d1b448c6cf133a71e6370499d875ff9ad4b1d0a502dbfd8b067a1f1153f67f" +dependencies = [ + "futures", + "indexmap", + "noodles-bgzf", + "noodles-core", + "noodles-csi", + "percent-encoding", + "tokio", +] + +[[package]] +name = "noodles-gtf" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe48bdf59c757e63369f0fd14a90c467352d97a6c6ddf749ace30fa5f61b4602" +dependencies = [ + "noodles-bgzf", + "noodles-core", + "noodles-csi", +] + +[[package]] +name = "noodles-sam" +version = "0.68.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "986c1894ed4407fb3bc00a153070da0ba9f97499ba74e6752ea0181c20f48991" +dependencies = [ + "bitflags 2.7.0", + "bstr", + "futures", + "indexmap", + "lexical-core", + "memchr", + "noodles-bgzf", + "noodles-core", + "noodles-csi", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "noodles-tabix" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fde991a31c6203845117944c1d5f697b69c382e37eb2d70f3e3f2b575fbca62d" +dependencies = [ + "byteorder", + "indexmap", + "noodles-bgzf", + "noodles-core", + "noodles-csi", + "tokio", +] + +[[package]] +name = "noodles-vcf" +version = "0.70.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ae18ab19252b5f8a4fe3310a0a1d2e2875a886e81a9e64aa69510a471655921" +dependencies = [ + "futures", + "indexmap", + "memchr", + "noodles-bgzf", + "noodles-core", + "noodles-csi", + "noodles-tabix", + "percent-encoding", + "pin-project-lite", + "tokio", +] + [[package]] name = "now" version = "0.1.3" @@ -2909,6 +4131,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "num" version = "0.4.3" @@ -2942,6 +4174,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + [[package]] name = "num-integer" version = "0.1.46" @@ -3030,7 +4268,7 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4e89ad9e3d7d297152b17d39ed92cd50ca8063a89a9fa569046d41568891eff" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "block2", "libc", "objc2", @@ -3046,7 +4284,7 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "617fbf49e071c178c0b24c080767db52958f716d9eabdf0890523aeae54773ef" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "block2", "objc2", "objc2-foundation", @@ -3076,7 +4314,7 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ee638a5da3799329310ad4cfa62fbf045d5f56e3ef5ba4149e7452dcf89d5a8" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "block2", "libc", "objc2", @@ -3088,7 +4326,7 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd0cba1276f6023976a406a14ffa85e1fdd19df6b0f737b063b95f6c8c7aadd6" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "block2", "objc2", "objc2-foundation", @@ -3100,7 +4338,7 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e42bee7bff906b14b167da2bac5efe6b6a07e6f7c0a21a7308d40c960242dc7a" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "block2", "objc2", "objc2-foundation", @@ -3123,14 +4361,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3cfccb68961a56facde1163f9319e0d15743352344e7808a11795fb99698dcaf" dependencies = [ "async-trait", - "base64", + "base64 0.22.1", "bytes", "chrono", "futures", "httparse", "humantime", - "hyper", - "itertools", + "hyper 1.5.2", + "itertools 0.13.0", "md-5", "parking_lot", "percent-encoding", @@ -3138,7 +4376,7 @@ dependencies = [ "rand", "reqwest", "ring", - "rustls-pemfile", + "rustls-pemfile 2.2.0", "serde", "serde_json", "snafu", @@ -3173,10 +4411,22 @@ dependencies = [ name = "ordered-float" version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bb71e1b3fa6ca1c61f383464aaf2bb0e2f8e772a1f01d486832464de363b951" -dependencies = [ - "num-traits", -] +checksum = "7bb71e1b3fa6ca1c61f383464aaf2bb0e2f8e772a1f01d486832464de363b951" +dependencies = [ + "num-traits", +] + +[[package]] +name = "outref" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] name = "parking_lot" @@ -3215,7 +4465,7 @@ dependencies = [ "arrow-ipc", "arrow-schema", "arrow-select", - "base64", + "base64 0.22.1", "brotli", "bytes", "chrono", @@ -3274,7 +4524,7 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" dependencies = [ - "phf_shared", + "phf_shared 0.11.3", ] [[package]] @@ -3283,8 +4533,18 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" dependencies = [ - "phf_generator", - "phf_shared", + "phf_generator 0.11.3", + "phf_shared 0.11.3", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared 0.10.0", + "rand", ] [[package]] @@ -3293,17 +4553,46 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ - "phf_shared", + "phf_shared 0.11.3", "rand", ] +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher 0.3.11", +] + [[package]] name = "phf_shared" version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" dependencies = [ - "siphasher", + "siphasher 1.0.1", +] + +[[package]] +name = "pin-project" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e2ec53ad785f4d35dac0adea7f7dc6f1bb277ad84a680c7afefeae05d1f5916" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d56a66c0c55993aa927429d0f8a0abfd74f084e4d9c192cffed01e418d83eefb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.96", ] [[package]] @@ -3350,7 +4639,7 @@ dependencies = [ "polars-sql", "polars-time", "polars-utils", - "version_check", + "version_check 0.9.5", ] [[package]] @@ -3383,7 +4672,7 @@ dependencies = [ "streaming-iterator", "strength_reduce", "strum_macros 0.26.4", - "version_check", + "version_check 0.9.5", "zstd 0.13.2", ] @@ -3415,7 +4704,7 @@ dependencies = [ "polars-utils", "ryu", "strength_reduce", - "version_check", + "version_check 0.9.5", ] [[package]] @@ -3424,7 +4713,7 @@ version = "0.45.1" source = "git+https://github.com/mwiewior/polars.git?rev=9d4fca54b1d71fce08a51cf00a88f67c67313706#9d4fca54b1d71fce08a51cf00a88f67c67313706" dependencies = [ "ahash", - "bitflags 2.6.0", + "bitflags 2.7.0", "bytemuck", "chrono", "chrono-tz", @@ -3451,7 +4740,7 @@ dependencies = [ "serde_json", "strum_macros 0.26.4", "thiserror 2.0.10", - "version_check", + "version_check 0.9.5", "xxhash-rust", ] @@ -3474,7 +4763,7 @@ version = "0.45.1" source = "git+https://github.com/mwiewior/polars.git?rev=9d4fca54b1d71fce08a51cf00a88f67c67313706#9d4fca54b1d71fce08a51cf00a88f67c67313706" dependencies = [ "ahash", - "bitflags 2.6.0", + "bitflags 2.7.0", "hashbrown 0.15.2", "num-traits", "once_cell", @@ -3577,7 +4866,7 @@ version = "0.45.1" source = "git+https://github.com/mwiewior/polars.git?rev=9d4fca54b1d71fce08a51cf00a88f67c67313706#9d4fca54b1d71fce08a51cf00a88f67c67313706" dependencies = [ "ahash", - "bitflags 2.6.0", + "bitflags 2.7.0", "futures", "memchr", "once_cell", @@ -3596,7 +4885,7 @@ dependencies = [ "pyo3", "rayon", "tokio", - "version_check", + "version_check 0.9.5", ] [[package]] @@ -3629,7 +4918,7 @@ dependencies = [ "ahash", "aho-corasick", "argminmax", - "base64", + "base64 0.22.1", "bytemuck", "chrono", "chrono-tz", @@ -3656,7 +4945,7 @@ dependencies = [ "serde_json", "strum_macros 0.26.4", "unicode-reverse", - "version_check", + "version_check 0.9.5", ] [[package]] @@ -3666,7 +4955,7 @@ source = "git+https://github.com/mwiewior/polars.git?rev=9d4fca54b1d71fce08a51cf dependencies = [ "ahash", "async-stream", - "base64", + "base64 0.22.1", "brotli", "bytemuck", "ethnum", @@ -3720,7 +5009,7 @@ dependencies = [ "rayon", "tokio", "uuid", - "version_check", + "version_check 0.9.5", ] [[package]] @@ -3729,7 +5018,7 @@ version = "0.45.1" source = "git+https://github.com/mwiewior/polars.git?rev=9d4fca54b1d71fce08a51cf00a88f67c67313706#9d4fca54b1d71fce08a51cf00a88f67c67313706" dependencies = [ "ahash", - "bitflags 2.6.0", + "bitflags 2.7.0", "bytemuck", "bytes", "chrono", @@ -3759,7 +5048,7 @@ dependencies = [ "regex", "serde", "strum_macros 0.26.4", - "version_check", + "version_check 0.9.5", ] [[package]] @@ -3796,7 +5085,7 @@ dependencies = [ "recursive", "serde_json", "thiserror 2.0.10", - "version_check", + "version_check 0.9.5", ] [[package]] @@ -3804,7 +5093,7 @@ name = "polars-row" version = "0.45.1" source = "git+https://github.com/mwiewior/polars.git?rev=9d4fca54b1d71fce08a51cf00a88f67c67313706#9d4fca54b1d71fce08a51cf00a88f67c67313706" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "bytemuck", "polars-arrow", "polars-compute", @@ -3821,7 +5110,7 @@ dependencies = [ "polars-error", "polars-utils", "serde", - "version_check", + "version_check 0.9.5", ] [[package]] @@ -3871,7 +5160,7 @@ dependencies = [ "recursive", "slotmap", "tokio", - "version_check", + "version_check 0.9.5", ] [[package]] @@ -3919,18 +5208,19 @@ dependencies = [ "serde", "stacker", "sysinfo", - "version_check", + "version_check 0.9.5", ] [[package]] name = "polars_bio" -version = "0.4.0" +version = "0.4.1" dependencies = [ "arrow", "arrow-array", "arrow-schema", "datafusion", "datafusion-python", + "exon", "futures-util", "log", "polars", @@ -3943,6 +5233,7 @@ dependencies = [ "pyo3-log", "sequila-core", "tokio", + "tracing", ] [[package]] @@ -3960,6 +5251,12 @@ dependencies = [ "portable-atomic", ] +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" version = "0.2.20" @@ -3969,6 +5266,36 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn 1.0.109", + "version_check 0.9.5", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check 0.9.5", +] + [[package]] name = "proc-macro2" version = "1.0.92" @@ -3995,10 +5322,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "157c5a9d7ea5c2ed2d9fb8f495b64759f7816c7eaea54ba3978f0d63000162e3" dependencies = [ "anyhow", - "itertools", + "itertools 0.13.0", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -4079,7 +5406,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -4092,7 +5419,7 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -4109,6 +5436,7 @@ checksum = "165859e9e55f79d67b96c5d96f4e88b6f2695a1972849c15a6a3f5c59fc2c003" dependencies = [ "memchr", "serde", + "tokio", ] [[package]] @@ -4122,7 +5450,7 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash 2.1.0", - "rustls", + "rustls 0.23.21", "socket2", "thiserror 2.0.10", "tokio", @@ -4140,7 +5468,7 @@ dependencies = [ "rand", "ring", "rustc-hash 2.1.0", - "rustls", + "rustls 0.23.21", "rustls-pki-types", "slab", "thiserror 2.0.10", @@ -4218,7 +5546,7 @@ version = "11.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ab240315c661615f2ee9f0f2cd32d5a7343a84d5ebcccb99d46e6637565e7b0" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", ] [[package]] @@ -4264,7 +5592,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -4273,7 +5601,7 @@ version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", ] [[package]] @@ -4293,7 +5621,7 @@ checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -4346,16 +5674,16 @@ version = "0.12.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "futures-core", "futures-util", - "h2", - "http", - "http-body", + "h2 0.4.7", + "http 1.2.0", + "http-body 1.0.1", "http-body-util", - "hyper", - "hyper-rustls", + "hyper 1.5.2", + "hyper-rustls 0.27.5", "hyper-util", "ipnet", "js-sys", @@ -4365,16 +5693,16 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls", - "rustls-native-certs", - "rustls-pemfile", + "rustls 0.23.21", + "rustls-native-certs 0.8.1", + "rustls-pemfile 2.2.0", "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", "sync_wrapper", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.1", "tokio-util", "tower", "tower-service", @@ -4458,7 +5786,7 @@ version = "0.38.43" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a78891ee6bf2340288408954ac787aa063d8e8817e9f53abb37c695c6d834ef6" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "errno", "libc", "linux-raw-sys", @@ -4467,18 +5795,42 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.20" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", +] + +[[package]] +name = "rustls" +version = "0.23.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5065c3f250cbd332cd894be57c40fa52387247659b14a2d6041d121547903b1b" +checksum = "8f287924602bf649d949c63dc8ac8b235fa5387d394020705b80c4eb597ce5b8" dependencies = [ "once_cell", "ring", "rustls-pki-types", - "rustls-webpki", + "rustls-webpki 0.102.8", "subtle", "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +dependencies = [ + "openssl-probe", + "rustls-pemfile 1.0.4", + "schannel", + "security-framework 2.11.1", +] + [[package]] name = "rustls-native-certs" version = "0.8.1" @@ -4488,7 +5840,16 @@ dependencies = [ "openssl-probe", "rustls-pki-types", "schannel", - "security-framework", + "security-framework 3.2.0", +] + +[[package]] +name = "rustls-pemfile" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +dependencies = [ + "base64 0.21.7", ] [[package]] @@ -4509,6 +5870,16 @@ dependencies = [ "web-time", ] +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "rustls-webpki" version = "0.102.8" @@ -4571,14 +5942,37 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "security-framework" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +dependencies = [ + "bitflags 2.7.0", + "core-foundation 0.9.4", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + [[package]] name = "security-framework" version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316" dependencies = [ - "bitflags 2.6.0", - "core-foundation", + "bitflags 2.7.0", + "core-foundation 0.10.0", "core-foundation-sys", "libc", "security-framework-sys", @@ -4643,6 +6037,15 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde_bytes" +version = "0.11.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "387cc504cb06bb40a96c8e04e951fe01854cf6bc921053c954e4a606d9675c6a" +dependencies = [ + "serde", +] + [[package]] name = "serde_derive" version = "1.0.217" @@ -4651,7 +6054,7 @@ checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -4690,6 +6093,15 @@ dependencies = [ "digest", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -4741,6 +6153,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + [[package]] name = "siphasher" version = "1.0.1" @@ -4762,7 +6180,7 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dbff4acf519f630b3a3ddcfaea6c06b42174d9a44bc70c620e9ed1649d58b82a" dependencies = [ - "version_check", + "version_check 0.9.5", ] [[package]] @@ -4789,7 +6207,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -4841,7 +6259,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -4902,6 +6320,38 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" +[[package]] +name = "string_cache" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" +dependencies = [ + "new_debug_unreachable", + "once_cell", + "parking_lot", + "phf_shared 0.10.0", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", + "proc-macro2", + "quote", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "strum" version = "0.25.0" @@ -4927,7 +6377,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -4940,7 +6390,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -4962,9 +6412,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.95" +version = "2.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a" +checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80" dependencies = [ "proc-macro2", "quote", @@ -4980,6 +6430,18 @@ dependencies = [ "futures-core", ] +[[package]] +name = "synstructure" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", + "unicode-xid", +] + [[package]] name = "synstructure" version = "0.13.1" @@ -4988,7 +6450,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -5056,7 +6518,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -5067,7 +6529,17 @@ checksum = "9e9465d30713b56a37ede7185763c3492a91be2f5fa68d958c44e41ab9248beb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", +] + +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", ] [[package]] @@ -5081,6 +6553,36 @@ dependencies = [ "ordered-float 2.10.1", ] +[[package]] +name = "time" +version = "0.3.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21" +dependencies = [ + "deranged", + "num-conv", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "time-macros" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tiny-keccak" version = "2.0.2" @@ -5142,7 +6644,17 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", +] + +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", ] [[package]] @@ -5151,7 +6663,7 @@ version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37" dependencies = [ - "rustls", + "rustls 0.23.21", "tokio", ] @@ -5163,6 +6675,7 @@ checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" dependencies = [ "bytes", "futures-core", + "futures-io", "futures-sink", "pin-project-lite", "tokio", @@ -5201,6 +6714,7 @@ version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ + "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -5214,7 +6728,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -5224,6 +6738,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" +dependencies = [ + "nu-ansi-term", + "sharded-slab", + "smallvec", + "thread_local", + "tracing-core", + "tracing-log", ] [[package]] @@ -5286,7 +6826,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -5295,6 +6835,33 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" +[[package]] +name = "ufmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a64846ec02b57e9108d6469d98d1648782ad6bb150a95a9baac26900bbeab9d" +dependencies = [ + "ufmt-macros", + "ufmt-write", +] + +[[package]] +name = "ufmt-macros" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d337d3be617449165cb4633c8dece429afd83f84051024079f97ad32a9663716" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "ufmt-write" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e87a2ed6b42ec5e28cc3b94c09982969e9227600b2e3dcbc1db927a84c06bd69" + [[package]] name = "unicode-ident" version = "1.0.14" @@ -5322,6 +6889,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "unindent" version = "0.2.3" @@ -5345,6 +6918,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf16_iter" version = "1.0.5" @@ -5357,16 +6936,28 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "uuid" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +checksum = "b913a3b5fe84142e269d63cc62b64319ccaf89b748fc31fe025177f767a756c4" dependencies = [ "getrandom", "serde", ] +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + [[package]] name = "value-trait" version = "0.10.1" @@ -5388,12 +6979,24 @@ dependencies = [ "serde", ] +[[package]] +name = "version_check" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" + [[package]] name = "version_check" version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + [[package]] name = "walkdir" version = "2.5.0" @@ -5440,7 +7043,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", "wasm-bindgen-shared", ] @@ -5475,7 +7078,7 @@ checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -5519,6 +7122,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed63aea5ce73d0ff405984102c42de94fc55a6b75765d621c65262469b3c9b53" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "wide" version = "0.7.32" @@ -5599,7 +7212,7 @@ checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -5610,7 +7223,7 @@ checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -5820,6 +7433,12 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec107c4503ea0b4a98ef47356329af139c0a4f7750e621cf2973cd3385ebcb3d" +[[package]] +name = "xmlparser" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" + [[package]] name = "xxhash-rust" version = "0.8.15" @@ -5855,8 +7474,8 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", - "synstructure", + "syn 2.0.96", + "synstructure 0.13.1", ] [[package]] @@ -5877,7 +7496,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] @@ -5897,8 +7516,8 @@ checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", - "synstructure", + "syn 2.0.96", + "synstructure 0.13.1", ] [[package]] @@ -5926,7 +7545,7 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.95", + "syn 2.0.96", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 2715613..09f2240 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "polars_bio" -version = "0.4.0" +version = "0.4.1" edition = "2021" [lib] @@ -12,15 +12,17 @@ crate-type= ["cdylib"] [dependencies] datafusion-python = { git = "https://github.com/apache/datafusion-python.git", rev = "5c834934dec89bd96ff70df3b278e9d6fe78f7ec"} -pyo3 = { version = "0.22.4", features = ["extension-module", "abi3-py38", "experimental-async"] } +pyo3 = { version = "0.22"} pyo3-log = "0.11.0" sequila-core = { git = "https://github.com/biodatageeks/sequila-native.git", rev = "07a36935177f8ffbfbaa7f63958384108efd7b4f" } + datafusion = { version = "43.0.0"} arrow = "53.3.0" arrow-schema = "53.3.0" arrow-array = { version = "53.3.0", features = ["ffi"] } tokio = {version = "1.42.0", features = ["full", "tracing"]} log = "0.4.22" +tracing = { version = "0.1.41", features = ["log"] } futures-util = "0.3.31" @@ -30,4 +32,7 @@ polars-plan = { git = "https://github.com/mwiewior/polars.git" , rev = "9d4fca54 polars-lazy = { git = "https://github.com/mwiewior/polars.git" , rev = "9d4fca54b1d71fce08a51cf00a88f67c67313706", features = ["parquet", "new_streaming", "streaming", "csv", "cse"]} polars-core = {git = "https://github.com/mwiewior/polars.git" , rev = "9d4fca54b1d71fce08a51cf00a88f67c67313706"} polars-arrow = { git = "https://github.com/mwiewior/polars.git" , rev = "9d4fca54b1d71fce08a51cf00a88f67c67313706"} -polars-python = { git = "https://github.com/mwiewior/polars.git" , rev = "9d4fca54b1d71fce08a51cf00a88f67c67313706"} \ No newline at end of file +polars-python = { git = "https://github.com/mwiewior/polars.git" , rev = "9d4fca54b1d71fce08a51cf00a88f67c67313706"} + +#exon ="0.32.4" +exon = { git = "https://github.com/mwiewior/exon.git", rev="c543c03937ce5c8f249a77e45a28d7138e0a9c0f"} \ No newline at end of file diff --git a/README.md b/README.md index 3255092..3cef5ca 100644 --- a/README.md +++ b/README.md @@ -14,10 +14,10 @@ It provides a DataFrame API for genomics data and is designed to be blazing fast, memory efficient and easy to use. ## Key Features -* optimized for [peformance](performance.md#results-summary-) and large-scale genomics datasets -* popular genomics [operations](features.md#genomic-ranges-operations) with a DataFrame API (both [Pandas](https://pandas.pydata.org/) and [polars](https://pola.rs/)) +* optimized for [peformance](docs/performance.md#results-summary-) and large-scale genomics datasets +* popular genomics [operations](docs/features.md#genomic-ranges-operations) with a DataFrame API (both [Pandas](https://pandas.pydata.org/) and [polars](https://pola.rs/)) * native parallel engine powered by Apache DataFusion and [sequila-native](https://github.com/biodatageeks/sequila-native) -* [out-of-core](features.md#streaming-out-of-core-processing) processing (for data too large to fit into a computer's main memory) with [Apache DataFusion](https://datafusion.apache.org/) and [polars](https://pola.rs/) +* [out-of-core](docs/features.md#streaming-out-of-core-processing-exeprimental) processing (for data too large to fit into a computer's main memory) with [Apache DataFusion](https://datafusion.apache.org/) and [polars](https://pola.rs/) * zero-copy data exchange with [Apache Arrow](https://arrow.apache.org/) * pre-built wheel packages for *Linux*, *Windows* and *MacOS* (*arm64* and *x86_64*) available on [PyPI](https://pypi.org/project/polars-bio/#files) diff --git a/benchmark/src/bench_overlap.py b/benchmark/src/bench_overlap.py index 009d158..1acea24 100755 --- a/benchmark/src/bench_overlap.py +++ b/benchmark/src/bench_overlap.py @@ -267,10 +267,10 @@ def genomicranges(df_1, df_2): # Display the table benchmark_results = { - "inputs": { - "df_1_num": len(df_1), - "df_2_num": len(df_2), - }, + # "inputs": { + # "df_1_num": len(df_1), + # "df_2_num": len(df_2), + # }, # "output_num": pb.overlap(df_1, df_2, col1=columns, col2=columns) # .collect() # .count(), diff --git a/docs/cookbook.md b/docs/cookbook.md index a0c6a88..e707ceb 100644 --- a/docs/cookbook.md +++ b/docs/cookbook.md @@ -1,2 +1,14 @@ :construction: +[//]: # (## Genomic ranges operations) + +[//]: # (## How to read bioinformatics formats) + + +[//]: # () +[//]: # (## How to work directly with Datafusion DataFrame) + +[//]: # (To bypasss issue XXX) + +[//]: # (## How to set logging level) + diff --git a/mkdocs.yml b/mkdocs.yml index 2e0ea24..cb3790d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -11,13 +11,13 @@ nav: - 📚 Tutorial: notebooks/tutorial.ipynb - 🚀 Performance: performance.md - ⚙️ API reference: api.md + + - Quick start: quickstart.md - Features: features.md - Cookbook: cookbook.md - Tutorial: notebooks/tutorial.ipynb - Performance: performance.md - - API reference: api.md - - FAQ: faq.md plugins: - search diff --git a/poetry.lock b/poetry.lock index 18a9e01..75ccf80 100644 --- a/poetry.lock +++ b/poetry.lock @@ -192,6 +192,42 @@ charset-normalizer = ["charset-normalizer"] html5lib = ["html5lib"] lxml = ["lxml"] +[[package]] +name = "biocframe" +version = "0.6.2" +description = "Flexible dataframe representation to support nested structures." +optional = false +python-versions = ">=3.9" +files = [ + {file = "BiocFrame-0.6.2-py3-none-any.whl", hash = "sha256:8fd427d45334b696661350a9aa490335c017158f8d270fcc97796d6fd40a37ee"}, + {file = "biocframe-0.6.2.tar.gz", hash = "sha256:0eb53268e010fbdd881ecf8c67cdfac8116cb205003f9609fe57790eb1eb64b5"}, +] + +[package.dependencies] +biocutils = ">=0.1.4" +numpy = "*" + +[package.extras] +optional = ["pandas", "polars"] +testing = ["pandas", "polars", "pytest", "pytest-cov", "setuptools"] + +[[package]] +name = "biocutils" +version = "0.2.1" +description = "Utilities to use across the biocpy packages." +optional = false +python-versions = ">=3.9" +files = [ + {file = "biocutils-0.2.1-py3-none-any.whl", hash = "sha256:3f09ed57a79e73c698f317e64c96386aee7d699046d6477987e2578af1836b6d"}, + {file = "biocutils-0.2.1.tar.gz", hash = "sha256:cec3c1c0166eea5a652da75ba9403e9fd270c297394f2745775c2b9c66085318"}, +] + +[package.dependencies] +numpy = "*" + +[package.extras] +testing = ["pandas", "pytest", "pytest-cov", "scipy", "setuptools"] + [[package]] name = "bioframe" version = "0.7.2" @@ -1079,6 +1115,27 @@ files = [ {file = "fqdn-1.5.1.tar.gz", hash = "sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f"}, ] +[[package]] +name = "genomicranges" +version = "0.5.2" +description = "Container class to represent and operate over genomic regions and annotations." +optional = false +python-versions = ">=3.9" +files = [ + {file = "GenomicRanges-0.5.2-py3-none-any.whl", hash = "sha256:d89cd90f060205b111258bdec1a3bd58d664539f0022c988dd785f4639cf3ee3"}, + {file = "genomicranges-0.5.2.tar.gz", hash = "sha256:8d3293447ace5e9bbe6bf18a8df1950359c9a4dd06471e72bec73c4607bfccb1"}, +] + +[package.dependencies] +biocframe = ">=0.5.11" +biocutils = ">=0.1.3" +iranges = {version = ">=0.2.12,<0.4.0", extras = ["optional"]} +numpy = "*" + +[package.extras] +optional = ["biobear", "joblib", "matplotlib", "pandas", "polars"] +testing = ["biobear", "joblib", "matplotlib", "pandas", "polars", "pytest", "pytest-cov", "rich", "seaborn", "setuptools"] + [[package]] name = "ghp-import" version = "2.1.0" @@ -1341,6 +1398,29 @@ widgetsnbextension = ">=4.0.12,<4.1.0" [package.extras] test = ["ipykernel", "jsonschema", "pytest (>=3.6.0)", "pytest-cov", "pytz"] +[[package]] +name = "iranges" +version = "0.3.0" +description = "Python implementation of the [**IRanges**](https://bioconductor.org/packages/IRanges) Bioconductor package." +optional = false +python-versions = ">=3.9" +files = [ + {file = "IRanges-0.3.0-py3-none-any.whl", hash = "sha256:ab2c0cfc84e40defc391b9cd2cdcfa3ef916f67d0dca06d9e9bc326d300b2de5"}, + {file = "iranges-0.3.0.tar.gz", hash = "sha256:096399c9f4d3edc4f0660937d11b0be9d2c5a941d8e56cbd502aa90e4746ae18"}, +] + +[package.dependencies] +biocframe = ">=0.5.11" +biocutils = ">=0.1.4" +ncls = "0.0.68" +numpy = "*" +pandas = {version = "*", optional = true, markers = "extra == \"optional\""} +polars = {version = "*", optional = true, markers = "extra == \"optional\""} + +[package.extras] +optional = ["numpy", "pandas", "polars"] +testing = ["biocframe", "biocutils", "ncls (==0.0.68)", "numpy", "pandas", "polars", "pytest", "pytest-cov", "setuptools"] + [[package]] name = "isoduration" version = "20.11.0" @@ -3240,6 +3320,20 @@ files = [ [package.extras] test = ["cffi", "hypothesis", "pandas", "pytest", "pytz"] +[[package]] +name = "pybedtools" +version = "0.10.0" +description = "Wrapper around BEDTools for bioinformatics work" +optional = false +python-versions = "*" +files = [ + {file = "pybedtools-0.10.0.tar.gz", hash = "sha256:1a6fbaad23b013becc741d7d5922a2df03e391bc44ff92772ffb7dd456711161"}, +] + +[package.dependencies] +numpy = "*" +pysam = "*" + [[package]] name = "pycparser" version = "2.22" @@ -3266,6 +3360,21 @@ files = [ doc = ["sphinx", "sphinx_rtd_theme"] test = ["pillow", "pytest", "ruff"] +[[package]] +name = "pygenomics" +version = "0.1.1" +description = "Manipulating genomic intervals and data files in Python" +optional = false +python-versions = "^3.7.2" +files = [] +develop = false + +[package.source] +type = "git" +url = "https://gitlab.com/gtamazian/pygenomics.git" +reference = "0.1.1" +resolved_reference = "44289a1685bffcdb7d09058ddf26cbb288477962" + [[package]] name = "pygments" version = "2.19.1" @@ -3352,6 +3461,42 @@ url = "https://github.com/pyranges/pyranges.git" reference = "4f0a153336e7153cdfea15b141ce4ea35a24e233" resolved_reference = "4f0a153336e7153cdfea15b141ce4ea35a24e233" +[[package]] +name = "pysam" +version = "0.22.1" +description = "Package for reading, manipulating, and writing genomic data" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pysam-0.22.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f18e72013ef2db9a9bb7e8ac421934d054427f6c03e66ce8abc39b09c846ba72"}, + {file = "pysam-0.22.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:79cd94eeb96541385fa99e759a8f83d21428e092c8b577d50b4eee5823e757cd"}, + {file = "pysam-0.22.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:c71ea45461ee596949061f321a799a97c418164485fdd7e8db89aea2ff979092"}, + {file = "pysam-0.22.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:ab3343f221994d163e1ba2691430ce0f6e7da13762473e0d7f9a2d5db3bec235"}, + {file = "pysam-0.22.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:503c833e6cf348d87aec9113b1386d5c85c031d64deb914c29f5ad1792d103e6"}, + {file = "pysam-0.22.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4447fdc2630519a00b6bf598995f1440e6f398eb0c084a7c141db026990ae07a"}, + {file = "pysam-0.22.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:1be663a73cf56ddd1d309b91d314a0c94c9bf352eaa3c6eda30cef12699843f0"}, + {file = "pysam-0.22.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:aeb31472365014fd8b37da4a88af758094b5872a8a16a25635a52cf8ceff5a9f"}, + {file = "pysam-0.22.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e72e129d245574801125029a5892c9e18d2956b13c4203ea585cbd64ccde9351"}, + {file = "pysam-0.22.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f8f00bb1fb977fc33c87cf5fe9023eefc2ba3d43d30ab4875a1765827018c949"}, + {file = "pysam-0.22.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:c0e051fda433c1c7ff94532f60477bb83b97f4bb183567a0ae23f340e1c200b4"}, + {file = "pysam-0.22.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:860c7c78ddb1539b83d5476502ba14c8b4e8435810dc7a5b715196da3dfb86b6"}, + {file = "pysam-0.22.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:18d886d50d75d8f853057fbbb284f0f0e98afad1f76b1a6f55660ea167d31c17"}, + {file = "pysam-0.22.1-cp36-cp36m-manylinux_2_28_aarch64.whl", hash = "sha256:44420290a619c02da48ca0956548eb82a1665ae97b6ee69c094f9da5a6206431"}, + {file = "pysam-0.22.1-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:acff506c921af36f364c5a87f3a30b3c105ebeb270d0e821c2ca571eaf60ca20"}, + {file = "pysam-0.22.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:098e0bf12d8b0399613065843310c91ba31a02d014b1f6b4e9d7f2d0d1254ff8"}, + {file = "pysam-0.22.1-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:cd9d457063272df16136640515183ea501bf3371f140a134b2f0a42f425a37d9"}, + {file = "pysam-0.22.1-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:af9fb53157ba2431b7b20a550c0223f4a039304c9f180d8da98ea9d2d3ef3fbf"}, + {file = "pysam-0.22.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d3fd6fe5aca79933632f38e5b568ce8d4e67e5c4f3bd39bff55fd9646af814d2"}, + {file = "pysam-0.22.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b6cf1871c99cfc9c01261ec5f628519c2c889f0ff070e7a26aa5adbf9f69af1"}, + {file = "pysam-0.22.1-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:b1addca11c5cfceefaebdfcf3d83bc42f4b89fb1e8ae645a4bdab971cbcd2bc0"}, + {file = "pysam-0.22.1-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:17fac22fc89c86241a71084ca097878c61c97f6ff5fd4535d718681a849852a7"}, + {file = "pysam-0.22.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4aff9b41856d5dba6585ffd60884b8f3778c5d2688f33989662aabe7f4cd0fe0"}, + {file = "pysam-0.22.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:faa5298291b54f185c7b8f84510224918bddc64bbdcb2e8426ff43e83452310f"}, + {file = "pysam-0.22.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:4dfae1de006d1c6491a59b00052a3f67c53a136165cf4edd7789b5dcb1e6806f"}, + {file = "pysam-0.22.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:78ed746a39c9cebe489b8f0f86cf23c09c942e76c901260fb2794906e4cd0e26"}, + {file = "pysam-0.22.1.tar.gz", hash = "sha256:18a0b97be95bd71e584de698441c46651cdff378db1c9a4fb3f541e560253b22"}, +] + [[package]] name = "pytest" version = "8.3.4" @@ -4623,4 +4768,4 @@ test = ["pytest"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.13" -content-hash = "478004f8bdf6f7d9641e95558dcc67a10470af2e338286147745ccecd49be519" +content-hash = "190ff972e1e01c0d75d4c49cb5f18a768bd129e1e3a56822029b7db0ef47d00f" diff --git a/polars_bio/__init__.py b/polars_bio/__init__.py index 724c1a2..f9e9a18 100644 --- a/polars_bio/__init__.py +++ b/polars_bio/__init__.py @@ -1,11 +1,45 @@ import logging -from .range_op import FilterOp, ctx, nearest, overlap +from polars_bio.polars_bio import InputFormat + +from .context import ctx +from .io import ( + read_bam, + read_bed, + read_cram, + read_fasta, + read_fastq, + read_gff, + read_gtf, + read_indexed_bam, + read_indexed_vcf, + read_vcf, +) +from .range_op import FilterOp, nearest, overlap +from .range_viz import visualize_intervals logging.basicConfig() logging.getLogger().setLevel(logging.WARN) logger = logging.getLogger("polars_bio") logger.setLevel(logging.INFO) -__version__ = "0.4.0" -__all__ = ["overlap", "nearest", "ctx", "FilterOp", "vizualize_intervals"] + +__version__ = "0.4.1" +__all__ = [ + "overlap", + "nearest", + "ctx", + "FilterOp", + "visualize_intervals", + "read_bam", + "read_indexed_bam", + "read_vcf", + "read_cram", + "read_bed", + "read_gff", + "read_gtf", + "read_fasta", + "read_fastq", + "read_indexed_vcf", + "InputFormat", +] diff --git a/polars_bio/context.py b/polars_bio/context.py new file mode 100644 index 0000000..1ec74ed --- /dev/null +++ b/polars_bio/context.py @@ -0,0 +1,27 @@ +from polars_bio.polars_bio import BioSessionContext + + +def singleton(cls): + """Decorator to make a class a singleton.""" + instances = {} + + def get_instance(*args, **kwargs): + if cls not in instances: + instances[cls] = cls(*args, **kwargs) + return instances[cls] + + return get_instance + + +@singleton +class Context: + def __init__(self): + self.ctx = BioSessionContext() + self.ctx.set_option("datafusion.execution.target_partitions", "1") + self.ctx.set_option("sequila.interval_join_algorithm", "coitrees") + + def set_option(self, key, value): + self.ctx.set_option(key, value) + + +ctx = Context().ctx diff --git a/polars_bio/io.py b/polars_bio/io.py new file mode 100644 index 0000000..41f8c4f --- /dev/null +++ b/polars_bio/io.py @@ -0,0 +1,178 @@ +from typing import Iterator, Union + +import polars as pl +from datafusion import DataFrame +from polars.io.plugins import register_io_source + +from polars_bio.polars_bio import InputFormat, py_register_table, py_scan_table + +from .context import ctx + + +def read_bam(path: str) -> pl.LazyFrame: + """ + Read a BAM file into a LazyFrame. + + Parameters: + path: The path to the BAM file. + """ + return file_lazy_scan(path, InputFormat.Bam) + + +def read_cram(path: str) -> pl.LazyFrame: + """ + Read a CRAM file into a LazyFrame. + + Parameters: + path: The path to the CRAM file. + """ + return file_lazy_scan(path, InputFormat.Cram) + + +def read_indexed_bam(path: str) -> pl.LazyFrame: + """ + Read an indexed BAM file into a LazyFrame. + + Parameters: + path: The path to the BAM file. + + !!! warning + Predicate pushdown is not supported yet. So no real benefit from using an indexed BAM file. + """ + return file_lazy_scan(path, InputFormat.IndexedBam) + + +def read_vcf(path: str) -> pl.LazyFrame: + """ + Read a VCF file into a LazyFrame. + + Parameters: + Parameters: + path: The path to the VCF file. + """ + return file_lazy_scan(path, InputFormat.Vcf) + + +def read_bed(path: str) -> pl.LazyFrame: + """ + Read a BED file into a LazyFrame. + + Parameters: + Parameters: + path: The path to the BED file. + """ + return file_lazy_scan(path, InputFormat.Bed) + + +def read_gff(path: str) -> pl.LazyFrame: + """ + Read a GFF file into a LazyFrame. + + Parameters: + Parameters: + path: The path to the GFF file. + """ + return file_lazy_scan(path, InputFormat.Gff) + + +def read_gtf(path: str) -> pl.LazyFrame: + """ + Read a GTF file into a LazyFrame. + + Parameters: + Parameters: + path: The path to the GTF file. + """ + return file_lazy_scan(path, InputFormat.Gtf) + + +def read_fasta(path: str) -> pl.LazyFrame: + """ + Read a FASTA file into a LazyFrame. + + Parameters: + Parameters: + path: The path to the FASTA file. + """ + return file_lazy_scan(path, InputFormat.Fasta) + + +def read_fastq(path: str) -> pl.LazyFrame: + """ + Read a FASTQ file into a LazyFrame. + + Parameters: + Parameters: + path: The path to the FASTQ file. + """ + return file_lazy_scan(path, InputFormat.Fastq) + + +def read_indexed_vcf(path: str) -> pl.LazyFrame: + """ + Read an indexed VCF file into a LazyFrame. + + Parameters: + Parameters: + path: The path to the VCF file. + + !!! warning + Predicate pushdown is not supported yet. So no real benefit from using an indexed VCF file. + """ + return file_lazy_scan(path, InputFormat.Vcf) + + +def file_lazy_scan(path: str, input_format: InputFormat) -> pl.LazyFrame: + df_lazy: DataFrame = read_file(path, input_format) + arrow_schema = df_lazy.schema() + + def _overlap_source( + with_columns: Union[pl.Expr, None], + predicate: Union[pl.Expr, None], + n_rows: Union[int, None], + _batch_size: Union[int, None], + ) -> Iterator[pl.DataFrame]: + if n_rows and n_rows < 8192: # 8192 is the default batch size in datafusion + df = df_lazy.execute_stream().next().to_pyarrow() + df = pl.DataFrame(df).limit(n_rows) + if predicate is not None: + df = df.filter(predicate) + # TODO: We can push columns down to the DataFusion plan in the future, + # but for now we'll do it here. + if with_columns is not None: + df = df.select(with_columns) + yield df + return + df_stream = df_lazy.execute_stream() + for r in df_stream: + py_df = r.to_pyarrow() + df = pl.DataFrame(py_df) + if predicate is not None: + df = df.filter(predicate) + # TODO: We can push columns down to the DataFusion plan in the future, + # but for now we'll do it here. + if with_columns is not None: + df = df.select(with_columns) + yield df + + return register_io_source(_overlap_source, schema=arrow_schema) + + +def read_file(path: str, input_format: InputFormat) -> pl.DataFrame: + """ + Read a file into a DataFrame. + + Parameters + ---------- + path : str + The path to the file. + input_format : InputFormat + The input format of the file. + + Returns + ------- + pl.DataFrame + The DataFrame. + """ + table = py_register_table(ctx, path, input_format) + return py_scan_table(ctx, table.name) diff --git a/polars_bio/range_op.py b/polars_bio/range_op.py index 6135a1a..d25d8a7 100644 --- a/polars_bio/range_op.py +++ b/polars_bio/range_op.py @@ -4,7 +4,8 @@ import polars as pl from typing_extensions import TYPE_CHECKING, Union -from .range_op_helpers import Context, _validate_overlap_input, range_operation +from .context import ctx +from .range_op_helpers import _validate_overlap_input, range_operation if TYPE_CHECKING: pass @@ -12,8 +13,6 @@ DEFAULT_INTERVAL_COLUMNS = ["chrom", "start", "end"] -ctx = Context().ctx - def overlap( df1: Union[str, pl.DataFrame, pl.LazyFrame, pd.DataFrame], diff --git a/polars_bio/range_op_helpers.py b/polars_bio/range_op_helpers.py index b7eb979..5ba268b 100644 --- a/polars_bio/range_op_helpers.py +++ b/polars_bio/range_op_helpers.py @@ -15,29 +15,6 @@ from .range_op_io import _df_to_arrow, _get_schema, _rename_columns, range_lazy_scan -def singleton(cls): - """Decorator to make a class a singleton.""" - instances = {} - - def get_instance(*args, **kwargs): - if cls not in instances: - instances[cls] = cls(*args, **kwargs) - return instances[cls] - - return get_instance - - -@singleton -class Context: - def __init__(self): - self.ctx = BioSessionContext() - self.ctx.set_option("datafusion.execution.target_partitions", "1") - self.ctx.set_option("sequila.interval_join_algorithm", "coitrees") - - def set_option(self, key, value): - self.ctx.set_option(key, value) - - def range_operation( df1: Union[str, pl.DataFrame, pl.LazyFrame, pd.DataFrame], df2: Union[str, pl.DataFrame, pl.LazyFrame, pd.DataFrame], diff --git a/polars_bio/range_op_io.py b/polars_bio/range_op_io.py index b8ca7fd..5420b0f 100644 --- a/polars_bio/range_op_io.py +++ b/polars_bio/range_op_io.py @@ -48,18 +48,19 @@ def _overlap_source( _batch_size: Union[int, None], ) -> Iterator[pl.DataFrame]: df_lazy: datafusion.DataFrame = range_function(ctx, df_1, df_2, range_options) + df_lazy.schema() df_stream = df_lazy.execute_stream() for r in df_stream: py_df = r.to_pyarrow() df = pl.DataFrame(py_df) - # TODO: We can push predicates down to the DataFusion plan in the future, - # but for now we'll do it here. - if predicate is not None: - df = df.filter(predicate) - # TODO: We can push columns down to the DataFusion plan in the future, - # but for now we'll do it here. - if with_columns is not None: - df = df.select(with_columns) + # # TODO: We can push predicates down to the DataFusion plan in the future, + # # but for now we'll do it here. + # if predicate is not None: + # df = df.filter(predicate) + # # TODO: We can push columns down to the DataFusion plan in the future, + # # but for now we'll do it here. + # if with_columns is not None: + # df = df.select(with_columns) yield df return register_io_source(_overlap_source, schema=schema) diff --git a/pyproject.toml b/pyproject.toml index 967267f..e1fc93e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,7 +68,7 @@ rich = "^13.9.4" ## only for benchmarking pyranges = {git = "https://github.com/pyranges/pyranges.git", rev = "4f0a153336e7153cdfea15b141ce4ea35a24e233" } -#GenomicRanges = "^0.5.0" +GenomicRanges = "^0.5.0" #pyranges1 = { git = "https://github.com/mwiewior/pyranges1.git", rev = "949d7c15c1c2e217f4404415f79b386f326b6f8d"} -#pybedtools = "^0.10.0" -#pygenomics = { git = "https://gitlab.com/gtamazian/pygenomics.git", rev = "0.1.1"} \ No newline at end of file +pybedtools = "^0.10.0" +pygenomics = { git = "https://gitlab.com/gtamazian/pygenomics.git", rev = "0.1.1"} \ No newline at end of file diff --git a/src/context.rs b/src/context.rs index 776e377..16e046d 100644 --- a/src/context.rs +++ b/src/context.rs @@ -1,15 +1,17 @@ use std::collections::HashMap; use datafusion::config::ConfigOptions; -use datafusion::prelude::{SessionConfig, SessionContext}; +use datafusion::prelude::SessionConfig; +use exon::config::ExonConfigExtension; +use exon::ExonSession; use log::debug; use pyo3::{pyclass, pymethods, PyResult}; -use sequila_core::session_context::{SeQuiLaSessionExt, SequilaConfig}; +use sequila_core::session_context::SequilaConfig; #[pyclass(name = "BioSessionContext")] -#[derive(Clone)] +// #[derive(Clone)] pub struct PyBioSessionContext { - pub ctx: SessionContext, + pub ctx: ExonSession, pub session_config: HashMap, } @@ -18,8 +20,9 @@ impl PyBioSessionContext { #[pyo3(signature = ())] #[new] pub fn new() -> PyResult { - let ctx = create_context(); + let ctx = create_context().unwrap(); let session_config: HashMap = HashMap::new(); + Ok(PyBioSessionContext { ctx, session_config, @@ -43,8 +46,8 @@ impl PyBioSessionContext { } } -pub fn set_option_internal(ctx: &SessionContext, key: &str, value: &str) { - let state = ctx.state_ref(); +pub fn set_option_internal(ctx: &ExonSession, key: &str, value: &str) { + let state = ctx.session.state_ref(); state .write() .config_mut() @@ -53,8 +56,9 @@ pub fn set_option_internal(ctx: &SessionContext, key: &str, value: &str) { .unwrap(); } -fn create_context() -> SessionContext { +fn create_context() -> exon::Result { let mut options = ConfigOptions::new(); + options.extensions.insert(ExonConfigExtension::default()); let tuning_options = vec![ ("datafusion.optimizer.repartition_joins", "false"), ("datafusion.execution.coalesce_batches", "false"), @@ -71,5 +75,5 @@ fn create_context() -> SessionContext { .with_option_extension(sequila_config) .with_information_schema(true); - SessionContext::new_with_sequila(config) + ExonSession::with_config_exon(config) } diff --git a/src/lib.rs b/src/lib.rs index 54e6f66..ca42058 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,7 +12,7 @@ use std::sync::{Arc, Mutex}; use datafusion::arrow::ffi_stream::ArrowArrayStreamReader; use datafusion::arrow::pyarrow::PyArrowType; use datafusion_python::dataframe::PyDataFrame; -use log::debug; +use log::{debug, error, info}; use polars_lazy::prelude::{LazyFrame, ScanArgsAnonymous}; use polars_python::error::PyPolarsErr; use polars_python::lazyframe::PyLazyFrame; @@ -21,7 +21,7 @@ use tokio::runtime::Runtime; use crate::context::PyBioSessionContext; use crate::operation::do_range_operation; -use crate::option::{FilterOp, RangeOp, RangeOptions}; +use crate::option::{BioTable, FilterOp, InputFormat, RangeOp, RangeOptions}; use crate::scan::{get_input_format, register_frame, register_table}; use crate::streaming::RangeOperationScan; use crate::utils::convert_arrow_rb_schema_to_polars_df_schema; @@ -31,7 +31,6 @@ const RIGHT_TABLE: &str = "s2"; const DEFAULT_COLUMN_NAMES: [&str; 3] = ["contig", "start", "end"]; #[pyfunction] - fn range_operation_frame( py_ctx: &PyBioSessionContext, df1: PyArrowType, @@ -51,7 +50,6 @@ fn range_operation_frame( } #[pyfunction] - fn range_operation_scan( py_ctx: &PyBioSessionContext, df_path1: String, @@ -81,7 +79,6 @@ fn range_operation_scan( } #[pyfunction] - fn stream_range_operation_scan( py: Python<'_>, py_ctx: &PyBioSessionContext, @@ -118,7 +115,12 @@ fn stream_range_operation_scan( }; debug!( "{}", - ctx.state().config().options().execution.target_partitions + ctx.session + .state() + .config() + .options() + .execution + .target_partitions ); let stream = rt.block_on(df.execute_stream()).unwrap(); let scan = RangeOperationScan { @@ -130,15 +132,80 @@ fn stream_range_operation_scan( }) } +#[pyfunction] +fn py_register_table( + py: Python<'_>, + py_ctx: &PyBioSessionContext, + path: String, + input_format: InputFormat, +) -> PyResult> { + #[allow(clippy::useless_conversion)] + py.allow_threads(|| { + let rt = Runtime::new().unwrap(); + let ctx = &py_ctx.ctx; + let table_name = path + .to_lowercase() + .split('/') + .last() + .unwrap() + .to_string() + .replace(&format!(".{}", input_format).to_string().to_lowercase(), "") + .replace(".", "_"); + rt.block_on(register_table( + ctx, + &path, + &*table_name, + input_format.clone(), + )); + match rt.block_on(ctx.session.table(&table_name)) { + Ok(table) => { + let schema = table.schema().as_arrow(); + info!("Table: {} registered for path: {}", table_name, path); + let bio_table = BioTable { + name: table_name, + format: input_format, + path, + }; + debug!("Schema: {:?}", schema); + Ok(Some(bio_table)) + }, + Err(e) => { + error!("{:?}", e); + Ok(None) + }, + } + }) +} + +#[pyfunction] +fn py_scan_table( + py: Python<'_>, + py_ctx: &PyBioSessionContext, + table_name: String, +) -> PyResult { + #[allow(clippy::useless_conversion)] + py.allow_threads(|| { + let rt = Runtime::new().unwrap(); + let ctx = &py_ctx.ctx; + let df = rt + .block_on(ctx.sql(&format!("SELECT * FROM {}", table_name))) + .unwrap(); + Ok(PyDataFrame::new(df)) + }) +} + #[pymodule] fn polars_bio(_py: Python, m: &Bound) -> PyResult<()> { pyo3_log::init(); m.add_function(wrap_pyfunction!(range_operation_frame, m)?)?; m.add_function(wrap_pyfunction!(range_operation_scan, m)?)?; m.add_function(wrap_pyfunction!(stream_range_operation_scan, m)?)?; + m.add_function(wrap_pyfunction!(py_scan_table, m)?)?; + m.add_function(wrap_pyfunction!(py_register_table, m)?)?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/src/operation.rs b/src/operation.rs index 83ea667..2ccdd54 100644 --- a/src/operation.rs +++ b/src/operation.rs @@ -1,4 +1,4 @@ -use datafusion::prelude::SessionContext; +use exon::ExonSession; use log::{debug, info}; use sequila_core::session_context::{Algorithm, SequilaConfig}; use tokio::runtime::Runtime; @@ -16,7 +16,7 @@ pub(crate) struct QueryParams { pub columns_2: Vec, } pub(crate) fn do_range_operation( - ctx: &SessionContext, + ctx: &ExonSession, rt: &Runtime, range_options: RangeOptions, ) -> datafusion::dataframe::DataFrame { @@ -43,14 +43,20 @@ pub(crate) fn do_range_operation( info!( "Running {} operation with algorithm {} and {} thread(s)...", range_options.range_op, - ctx.state() + ctx.session + .state() .config() .options() .extensions .get::() .unwrap() .interval_join_algorithm, - ctx.state().config().options().execution.target_partitions + ctx.session + .state() + .config() + .options() + .execution + .target_partitions ); match range_options.range_op { RangeOp::Overlap => rt.block_on(do_overlap(ctx, range_options)), @@ -63,7 +69,7 @@ pub(crate) fn do_range_operation( } async fn do_nearest( - ctx: &SessionContext, + ctx: &ExonSession, range_opts: RangeOptions, ) -> datafusion::dataframe::DataFrame { let query = prepare_query(nearest_query, range_opts); @@ -72,14 +78,19 @@ async fn do_nearest( } async fn do_overlap( - ctx: &SessionContext, + ctx: &ExonSession, range_opts: RangeOptions, ) -> datafusion::dataframe::DataFrame { let query = prepare_query(overlap_query, range_opts); debug!("Query: {}", query); debug!( "{}", - ctx.state().config().options().execution.target_partitions + ctx.session + .state() + .config() + .options() + .execution + .target_partitions ); ctx.sql(&query).await.unwrap() } diff --git a/src/option.rs b/src/option.rs index afee156..f5077c3 100644 --- a/src/option.rs +++ b/src/option.rs @@ -85,7 +85,51 @@ impl fmt::Display for RangeOp { } } +#[pyclass(eq, eq_int)] +#[derive(Clone, PartialEq, Debug)] pub enum InputFormat { Parquet, Csv, + Bam, + IndexedBam, + Cram, + Vcf, + IndexedVcf, + Fastq, + Fasta, + Bed, + Gff, + Gtf, +} + +#[pyclass(eq, get_all)] +#[derive(Clone, PartialEq, Debug)] +pub struct BioTable { + pub name: String, + pub format: InputFormat, + pub path: String, +} + +// impl BioTable { +// pub +// } + +impl fmt::Display for InputFormat { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let text = match self { + InputFormat::Parquet => "Parquet", + InputFormat::Csv => "CSV", + InputFormat::Bam => "BAM", + InputFormat::Vcf => "VCF", + InputFormat::Fastq => "FASTQ", + InputFormat::Fasta => "FASTA", + InputFormat::Bed => "BED", + InputFormat::Gff => "GFF", + InputFormat::Gtf => "GTF", + InputFormat::IndexedBam => "Indexed BAM", + InputFormat::IndexedVcf => "Indexed VCF", + InputFormat::Cram => "CRAM", + }; + write!(f, "{}", text) + } } diff --git a/src/scan.rs b/src/scan.rs index e1d1f42..a48b6e5 100644 --- a/src/scan.rs +++ b/src/scan.rs @@ -5,12 +5,13 @@ use arrow::error::ArrowError; use arrow::ffi_stream::ArrowArrayStreamReader; use arrow::pyarrow::PyArrowType; use datafusion::datasource::MemTable; -use datafusion::prelude::{CsvReadOptions, ParquetReadOptions, SessionContext}; +use datafusion::prelude::{CsvReadOptions, ParquetReadOptions}; +use exon::ExonSession; use crate::option::InputFormat; pub(crate) fn register_frame( - ctx: &SessionContext, + ctx: &ExonSession, df: PyArrowType, table_name: String, ) { @@ -19,8 +20,10 @@ pub(crate) fn register_frame( .unwrap(); let schema = batches[0].schema(); let table = MemTable::try_new(schema, vec![batches]).unwrap(); - ctx.deregister_table(&table_name).unwrap(); - ctx.register_table(&table_name, Arc::new(table)).unwrap(); + ctx.session.deregister_table(&table_name).unwrap(); + ctx.session + .register_table(&table_name, Arc::new(table)) + .unwrap(); } pub(crate) fn get_input_format(path: &str) -> InputFormat { @@ -28,20 +31,23 @@ pub(crate) fn get_input_format(path: &str) -> InputFormat { InputFormat::Parquet } else if path.ends_with(".csv") { InputFormat::Csv + } else if path.ends_with(".bam") { + InputFormat::Bam } else { panic!("Unsupported format") } } pub(crate) async fn register_table( - ctx: &SessionContext, + ctx: &ExonSession, path: &str, table_name: &str, format: InputFormat, -) { - ctx.deregister_table(table_name).unwrap(); +) -> String { + ctx.session.deregister_table(table_name).unwrap(); match format { InputFormat::Parquet => ctx + .session .register_parquet(table_name, path, ParquetReadOptions::new()) .await .unwrap(), @@ -49,9 +55,24 @@ pub(crate) async fn register_table( let csv_read_options = CsvReadOptions::new() //FIXME: expose .delimiter(b',') .has_header(true); - ctx.register_csv(table_name, path, csv_read_options) + ctx.session + .register_csv(table_name, path, csv_read_options) .await .unwrap() }, - } + InputFormat::Bam + | InputFormat::IndexedBam + | InputFormat::Vcf + | InputFormat::IndexedVcf + | InputFormat::Cram + | InputFormat::Fastq + | InputFormat::Fasta + | InputFormat::Bed + | InputFormat::Gff + | InputFormat::Gtf => ctx + .register_exon_table(table_name, path, &*format.to_string()) + .await + .unwrap(), + }; + table_name.to_string() } diff --git a/tests/test_io.py b/tests/test_io.py new file mode 100644 index 0000000..34821ef --- /dev/null +++ b/tests/test_io.py @@ -0,0 +1,10 @@ +import unittest + + +class MyTestCase(unittest.TestCase): + def test_something(self): + self.assertEqual(True, False) # add assertion here + + +if __name__ == "__main__": + unittest.main()