-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
tests: theyworkforyou strategies & examples
- Loading branch information
1 parent
70a5e25
commit 51e0080
Showing
2 changed files
with
340 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,242 @@ | ||
"""Test strategies for the `Debates` class.""" | ||
|
||
import re | ||
import string | ||
|
||
from bs4 import BeautifulSoup, NavigableString, Tag | ||
from hypothesis import provisional | ||
from hypothesis import strategies as st | ||
|
||
from ...common import GOV_DEPARTMENTS, MPS_SAMPLE, ST_DATES, ST_FREE_TEXT | ||
|
||
|
||
@st.composite | ||
def st_title_blocks(draw, date=None): | ||
"""Create text for a title block in a parliamentary entry.""" | ||
|
||
if date is None: | ||
date = draw(ST_DATES) | ||
|
||
title = draw(ST_FREE_TEXT) | ||
extra = draw(ST_FREE_TEXT) | ||
|
||
block = ": ".join((title, date.strftime("%d %b %Y"), extra)) | ||
|
||
return block | ||
|
||
|
||
@st.composite | ||
def st_indices(draw, date=None): | ||
"""Create an index for a parliamentary entry.""" | ||
|
||
if date is None: | ||
date = draw(ST_DATES) | ||
|
||
prefix = draw(st.text(alphabet="abc", max_size=1)) | ||
body = draw(st.integers(0, 10).map(str)) | ||
|
||
idx = ".".join((date.strftime("%Y-%m-%d"), prefix, body, "h")) | ||
|
||
return idx | ||
|
||
|
||
@st.composite | ||
def st_metadatas(draw): | ||
"""Create a metadata block for our parliamentary summary tests.""" | ||
|
||
date = draw(ST_DATES) | ||
block = draw(st_title_blocks(date)) | ||
idx = draw(st_indices(date)) | ||
|
||
cat = draw(st.sampled_from(("lords", "debates", "whall"))) | ||
url = "/".join((draw(provisional.urls()), cat, f"?id={idx}")) | ||
|
||
return block, date, idx, cat, url | ||
|
||
|
||
@st.composite | ||
def st_lead_metadatas(draw): | ||
"""Create a lead block for a written answer test.""" | ||
|
||
date = draw(ST_DATES) | ||
recipient = draw(st.sampled_from(GOV_DEPARTMENTS)) | ||
|
||
lead = ( | ||
f"{recipient} written question " | ||
f"- answered on {date.strftime('%d %B %Y')}" | ||
) | ||
|
||
return lead, recipient, date | ||
|
||
|
||
@st.composite | ||
def st_speeches(draw): | ||
"""Create a speech and its details for a parliamentary test.""" | ||
|
||
speaker, position, url = draw(st.sampled_from(MPS_SAMPLE)) | ||
speech = draw(ST_FREE_TEXT) | ||
|
||
return speech, speaker, position, url | ||
|
||
|
||
@st.composite | ||
def st_daily_boards(draw): | ||
"""Create some HTML soup to simulate a daily board.""" | ||
|
||
date = draw(st.dates()).strftime("%Y-%m-%d") | ||
url = f"https://theyworkforyou.com/debates/?d={date}" | ||
|
||
st_href = st.text( | ||
string.digits + string.ascii_letters, min_size=1, max_size=5 | ||
).map(lambda x: f"/debates/{x}.h") | ||
|
||
hrefs = draw(st.lists(st_href, min_size=1, max_size=10)) | ||
tags = [ | ||
f'<a href={href} class="business-list__title"></a>' for href in hrefs | ||
] | ||
soup = BeautifulSoup("\n".join(tags), "html.parser") | ||
|
||
return url, hrefs, soup | ||
|
||
|
||
def extract_href(url): | ||
"""Extract just the hyperlink reference from a URL.""" | ||
|
||
match = re.search(r"(?<=.com)\/\w+\/\d+(?=\/)", url) | ||
|
||
if match is None: | ||
return url | ||
|
||
return match.group() | ||
|
||
|
||
def format_speech_block(name, pos, href, text): | ||
"""Get a speech block into HTML format.""" | ||
|
||
html = '<div class="debate-speech__speaker-and-content">' | ||
html += '<h2 class="debate-speech__speaker">' | ||
html += f'<a href="{href}">' | ||
html += f'<strong class="debate-speech__speaker__name">{name}</strong>' | ||
html += f'<small class="debate-speech__speaker__position">{pos}</small>' | ||
html += "</a>" | ||
html += "</h2>" | ||
html += f'<div class="debate-speech__content"><p>{text}</p></div>' | ||
html += "</div>" | ||
|
||
return html | ||
|
||
|
||
@st.composite | ||
def st_speech_soups(draw): | ||
"""Create some HTML soup for a speech block.""" | ||
|
||
text, name, pos, url = draw(st_speeches()) | ||
href = extract_href(url) | ||
html = format_speech_block(name, pos, href, text) | ||
|
||
return BeautifulSoup(html, "html.parser"), name, pos, href, text | ||
|
||
|
||
@st.composite | ||
def st_debate_soups(draw): | ||
"""Create some HTML soup for a debate page.""" | ||
|
||
speakers = draw( | ||
st.lists( | ||
st.sampled_from(MPS_SAMPLE), | ||
min_size=2, | ||
max_size=10, | ||
unique=True, | ||
) | ||
) | ||
|
||
names, positions, hrefs, texts = [], [], [], [] | ||
html = "" | ||
for name, pos, url in speakers: | ||
href = extract_href(url) | ||
text = draw(ST_FREE_TEXT) | ||
names.append(name) | ||
positions.append(pos) | ||
hrefs.append(href) | ||
texts.append(text) | ||
|
||
html += format_speech_block(name, pos, href, text) | ||
|
||
return BeautifulSoup(html, "html.parser"), names, positions, hrefs, texts | ||
|
||
|
||
@st.composite | ||
def st_tags(draw): | ||
"""Create a tag for processing.""" | ||
|
||
name = draw(st.sampled_from(("a", "h1", "h2", "strong", "small", "p"))) | ||
text = draw(st.text(string.ascii_letters + string.digits, min_size=1)) | ||
|
||
tag = Tag(name=name) | ||
tag.insert(0, NavigableString(text)) | ||
|
||
return tag, text | ||
|
||
|
||
@st.composite | ||
def st_entry_urls( | ||
draw, categories=("debates", "lords", "whall", "wms", "senedd", "sp", "ni") | ||
): | ||
"""Create a realistic URL for an entry.""" | ||
|
||
category = draw(st.sampled_from(categories)) | ||
date = draw(ST_DATES) | ||
idx = draw(st.uuids().map(str)) | ||
index = f"?id={date}.{idx}" | ||
|
||
elements = filter(None, (category, index)) | ||
|
||
return "/".join(("https://theyworkforyou.com", *elements)) | ||
|
||
|
||
@st.composite | ||
def st_debate_transcripts(draw, max_size=10): | ||
"""Create a transcript dictionary for a debate.""" | ||
|
||
speakers = draw( | ||
st.lists(st.sampled_from(MPS_SAMPLE), min_size=2, max_size=max_size) | ||
) | ||
|
||
speeches = [] | ||
for name, position, url in speakers: | ||
text = draw(ST_FREE_TEXT) | ||
speech = { | ||
"name": name, | ||
"position": position, | ||
"url": url, | ||
"text": text, | ||
"response": text, | ||
} | ||
speeches.append(speech) | ||
|
||
transcript = { | ||
"title": draw(ST_FREE_TEXT), | ||
"url": draw(provisional.urls()), | ||
"speeches": speeches, | ||
} | ||
|
||
return transcript | ||
|
||
|
||
@st.composite | ||
def st_written_transcripts(draw): | ||
"""Create a transcript dictionary for a written answer entry.""" | ||
|
||
transcript = draw(st_debate_transcripts(max_size=3)) | ||
|
||
*questions, answer = transcript.pop("speeches") | ||
transcript["questions"] = questions | ||
transcript["answer"] = answer | ||
|
||
transcript["recipient"] = draw(st.sampled_from(GOV_DEPARTMENTS)) | ||
|
||
date = draw(st.dates()).isoformat() | ||
transcript["date"] = date | ||
transcript["answered"] = date | ||
|
||
return transcript |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
"""Example regression tests for the written answers reader.""" | ||
|
||
import requests | ||
from bs4 import BeautifulSoup | ||
|
||
from parliai_public.readers import WrittenAnswers | ||
|
||
|
||
def test_read_metadata_from_lead_2024_02_29_16305(): | ||
"""Test the lead extractor on entry 2024-02-29.16305.""" | ||
|
||
url = "https://theyworkforyou.com/wrans/?id=2024-02-29.16305.h" | ||
page = requests.get(url) | ||
soup = BeautifulSoup(page.content, "html.parser") | ||
|
||
recipient, on = WrittenAnswers._read_metadata_from_lead(soup) | ||
|
||
assert recipient == "Northern Ireland Office" | ||
assert on == "2024-03-06" | ||
|
||
|
||
def test_answer_does_not_mention_terms_2024_02_19_HL2510(): | ||
""" | ||
Test the output of entry 2024-02-19.HL2510 is parsed right. | ||
The answer to this written question does not mention the ONS, so we | ||
assert that the parsed response reflects that. Meanwhile, the | ||
question *does* mention the ONS. | ||
""" | ||
|
||
url = "https://www.theyworkforyou.com/wrans/?id=2024-02-19.HL2510.h" | ||
tool = WrittenAnswers.from_toml() | ||
|
||
transcript = tool.read(url) | ||
assert isinstance(transcript, dict) | ||
assert len(transcript["questions"]) == 1 | ||
|
||
question = transcript["questions"][0] | ||
assert tool.check_contains_terms(question["text"]) | ||
|
||
answer = transcript["answer"] | ||
assert not tool.check_contains_terms(answer["text"]) | ||
|
||
output = tool.render(transcript) | ||
answerer = ( | ||
"[Lord Offord of Garvel](https://theyworkforyou.com/peer/?p=26052)" | ||
" (Parliamentary Under Secretary of State" | ||
" (Department for Business and Trade))" | ||
) | ||
assert f"### Answered by {answerer}" in output | ||
assert output.endswith("Answer does not mention any search terms.") | ||
|
||
|
||
def test_multiple_questions_rendering_2024_03_20_19670(): | ||
"""Test for multiple questions like in entry 2024-03-20.19670.""" | ||
|
||
url = "https://www.theyworkforyou.com/wrans/?id=2024-03-20.19670.h" | ||
tool = WrittenAnswers.from_toml() | ||
|
||
transcript = tool.read(url) | ||
assert isinstance(transcript, dict) | ||
|
||
questions = transcript["questions"] | ||
answer = transcript["answer"] | ||
assert len(questions) == 2 | ||
assert isinstance(answer, dict) | ||
|
||
output = tool.render(transcript) | ||
for question in questions: | ||
assert question["name"] in output | ||
assert question["position"] in output | ||
assert question["url"] in output | ||
|
||
|
||
def test_pick_up_answer_block_2024_03_27_HL3698(): | ||
""" | ||
Test the output of entry 2024-03-27.HL3698. | ||
Taken from issue #53, the answer block doesn't get rendered. | ||
""" | ||
|
||
url = "https://www.theyworkforyou.com/wrans/?id=2024-03-27.HL3698.h" | ||
tool = WrittenAnswers.from_toml() | ||
|
||
transcript = tool.read(url) | ||
assert isinstance(transcript, dict) | ||
assert len(transcript["questions"]) == 1 | ||
|
||
answer = transcript["answer"] | ||
assert tool.check_contains_terms(answer["text"]) | ||
|
||
output = tool.render(transcript) | ||
answerer = ( | ||
"[Baroness Vere of Norbiton](https://theyworkforyou.com/peer/?p=25587)" | ||
" (The Parliamentary Secretary, HM Treasury)" | ||
) | ||
assert f"### Answered by {answerer}" in output | ||
assert not output.endswith(answerer) |