Skip to content

Commit

Permalink
Adding Dataframe tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mwiewior committed Dec 11, 2024
1 parent 7a6007f commit bbdecc7
Show file tree
Hide file tree
Showing 21 changed files with 3,409 additions and 5 deletions.
3,289 changes: 3,289 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

28 changes: 27 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
[build-system]
requires = ["maturin>=1.0,<2.0", "polars>=1.3.0"]
requires = ["poetry-core>=1.0.0", "maturin>=1.0,<2.0", "polars>=1.3.0"]
build-backend = "maturin"

[project]
name = "polars-bio"
version = "0.1.0"
description = "Blazing fast genomic operations on large Python dataframes"
authors = []
requires-python = ">=3.8"
classifiers = [
"Programming Language :: Rust",
Expand All @@ -17,3 +20,26 @@ module-name = "polars_bio"
[[tool.mypy.overrides]]
module = "polars.utils.udfs"
ignore_missing_imports = true

[tool.poetry]
name = "polars-bio"
version = "0.1.0"
description = ""
authors = ["Marek Wiewiórka <[email protected]>"]
readme = "README.md"

[tool.poetry.dependencies]
python = ">=3.9,<3.13"
polars = "^1.16.0"
pandas = "^2.2.3"


[tool.poetry.dev-dependencies]
pytest = "^8.3.3"
pytest-cov = "^6.0.0"
pre-commit = "^4.0.1"
jupyter = "^1.1.0"
ruff = "^0.8.2"
maturin = "^1.7.5"
bioframe = "^0.7.2"
mdpd = "^0.2.1"
42 changes: 42 additions & 0 deletions tests/_expected.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import mdpd
from pathlib import Path

import pandas as pd

TEST_DIR = Path(__file__).parent
DATA_DIR = TEST_DIR / "data"
EXPECTED_OVERLAP = """
"+--------+-----------+---------+--------+-----------+---------+",
"| contig_1 | pos_start_1 | pos_end_1 | contig_2 | pos_start_2 | pos_end_2 |",
"+--------+-----------+---------+--------+-----------+---------+",
"| chr1 | 150 | 250 | chr1 | 100 | 190 |",
"| chr1 | 150 | 250 | chr1 | 200 | 290 |",
"| chr1 | 190 | 300 | chr1 | 100 | 190 |",
"| chr1 | 190 | 300 | chr1 | 200 | 290 |",
"| chr1 | 300 | 501 | chr1 | 400 | 600 |",
"| chr1 | 500 | 700 | chr1 | 400 | 600 |",
"| chr1 | 15000 | 15000 | chr1 | 10000 | 20000 |",
"| chr1 | 22000 | 22300 | chr1 | 22100 | 22100 |",
"| chr2 | 150 | 250 | chr2 | 100 | 190 |",
"| chr2 | 150 | 250 | chr2 | 200 | 290 |",
"| chr2 | 190 | 300 | chr2 | 100 | 190 |",
"| chr2 | 190 | 300 | chr2 | 200 | 290 |",
"| chr2 | 300 | 500 | chr2 | 400 | 600 |",
"| chr2 | 500 | 700 | chr2 | 400 | 600 |",
"| chr2 | 15000 | 15000 | chr2 | 10000 | 20000 |",
"| chr2 | 22000 | 22300 | chr2 | 22100 | 22100 |",
"+--------+-----------+---------+--------+-----------+---------+",
"""

DF_OVERLAP = (mdpd.from_md(EXPECTED_OVERLAP)
.astype({'pos_start_1': 'int64'})
.astype({'pos_end_1': 'int64'})
.astype({'pos_start_2': 'int64'})
.astype({'pos_end_2': 'int64'}))

DF_OVERLAP = DF_OVERLAP.sort_values(by=list(DF_OVERLAP.columns)).reset_index(drop=True)



DF1 = pd.read_csv(f"{DATA_DIR}/reads.csv")
DF2 = pd.read_csv(f"{DATA_DIR}/targets.csv")
Binary file added tests/data/exons/._SUCCESS.crc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Empty file added tests/data/exons/_SUCCESS
Empty file.
Binary file not shown.
Binary file not shown.
Binary file added tests/data/fBrain-DS14718/._SUCCESS.crc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Empty file.
Binary file not shown.
Binary file not shown.
13 changes: 13 additions & 0 deletions tests/data/reads.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
contig,pos_start,pos_end
chr1,150,250
chr1,190,300
chr1,300,501
chr1,500,700
chr1,22000,22300
chr1,15000,15000
chr2,150,250
chr2,190,300
chr2,300,500
chr2,500,700
chr2,22000,22300
chr2,15000,15000
11 changes: 11 additions & 0 deletions tests/data/targets.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
contig,pos_start,pos_end
chr1,100,190
chr1,200,290
chr1,400,600
chr1,10000,20000
chr1,22100,22100
chr2,100,190
chr2,200,290
chr2,400,600
chr2,10000,20000
chr2,22100,22100
Empty file added tests/test_bioframe.py
Empty file.
23 changes: 23 additions & 0 deletions tests/test_pandas_overlap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import pandas as pd

import polars_bio as pb
from _expected import DF_OVERLAP, DF1, DF2






class TestOverlapPandas:
def test_overlap_count(self):
assert len(pb.overlap(DF1, DF2, output_type="pandas.DataFrame")) == 16

def test_overlap_schema_rows(self):
result = pb.overlap(DF1, DF2, output_type="pandas.DataFrame")
result = result.sort_values(by=list(result.columns)).reset_index(drop=True)
expected = DF_OVERLAP
pd.testing.assert_frame_equal(result, expected)




4 changes: 0 additions & 4 deletions tests/test_pig_latinnify.py

This file was deleted.

4 changes: 4 additions & 0 deletions tests/test_polars_overlap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import pandas as pd
import polars_bio.overlap as overlap


0 comments on commit bbdecc7

Please sign in to comment.