Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use action coord to help Element path exact match #30

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 18 additions & 18 deletions evaluate/evaluate_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,9 @@ def get_netloc(url: str) -> str:
return netloc


async def step_evaluate(page: Page, evaluate_steps=[], input_path=None, element_value=None, text_content=None):
async def step_evaluate(page: Page, evaluate_steps=[], input_path=None, element_value=None, text_content=None,input_coords=None):
# input_coords should be (x,y) in pixels, if not None
# and will be used in ElementEvaluator.path_exact_match()
"""Evaluate step score"""
step_score = 0
match_result = []
Expand All @@ -135,15 +137,12 @@ async def step_evaluate(page: Page, evaluate_steps=[], input_path=None, element_
elif match_function == "url_semantic_match":
score = await URLEvaluator.url_semantic_match(
page.url, evaluate["reference_answer"], evaluate["key"])
# print(score, "url_semantic_match")
elif match_function == "element_path_exactly_match":
input_netloc = get_netloc(page.url)
method = evaluate["method"]
score = ElementEvaluator.path_exact_match(
input_path, evaluate["reference_answer"], method, await page.content(), input_netloc,
evaluate["netloc"])
# print(score, "path_exact_match:", input_path,
# "***", evaluate["reference_answer"])
score = await ElementEvaluator.path_exact_match(
input_path, evaluate["reference_answer"], method, page, input_netloc,
evaluate["netloc"],input_coords=input_coords)
elif match_function == "element_path_included_match":
pass
# * Temporarily not doing
Expand All @@ -155,9 +154,9 @@ async def step_evaluate(page: Page, evaluate_steps=[], input_path=None, element_
# print(element_value)
# print(await page.locator(input_path).input_value())
if "path" in evaluate.keys():
path_score = ElementEvaluator.path_exact_match(input_path, evaluate["path"], "selector",
await page.content(), input_netloc,
evaluate["netloc"])
path_score = await ElementEvaluator.path_exact_match(input_path, evaluate["path"], "selector",
page, input_netloc,
evaluate["netloc"],input_coords=input_coords)
if path_score == 0:
# print("Path mismatch in value evaluation")
score = 0
Expand All @@ -172,12 +171,12 @@ async def step_evaluate(page: Page, evaluate_steps=[], input_path=None, element_
else:
score = 0
elif match_function == "element_value_included_match":
if input_path is not None and element_value is not None:
if (input_path is not None or input_coords is not None) and element_value is not None:
input_netloc = get_netloc(page.url)
if "path" in evaluate.keys():
path_score = ElementEvaluator.path_exact_match(input_path, evaluate["path"], "selector",
await page.content(), input_netloc,
evaluate["netloc"])
path_score = await ElementEvaluator.path_exact_match(input_path, evaluate["path"], "selector",
page, input_netloc,
evaluate["netloc"],input_coords=input_coords)
if path_score == 0:
# print("Path mismatch in value evaluation")
score = 0
Expand All @@ -192,14 +191,14 @@ async def step_evaluate(page: Page, evaluate_steps=[], input_path=None, element_
else:
score = 0
elif match_function == "element_value_semantic_match":
if input_path is not None and element_value is not None:
if (input_path is not None or input_coords is not None) and element_value is not None:
input_netloc = get_netloc(page.url)

if len(element_value) > 0:
if "path" in evaluate.keys():
path_score = ElementEvaluator.path_exact_match(input_path, evaluate["path"], "selector",
await page.content(), input_netloc,
evaluate["netloc"])
path_score = await ElementEvaluator.path_exact_match(input_path, evaluate["path"], "selector",
page, input_netloc,
evaluate["netloc"],input_coords=input_coords)
if path_score == 0:
# print("Path mismatch in value evaluation")
score = 0
Expand Down Expand Up @@ -247,6 +246,7 @@ async def step_evaluate(page: Page, evaluate_steps=[], input_path=None, element_
return evaluate_steps, match_result



def parse_current_trace(response: dict, env: AsyncHTMLEnvironment, step_reward: dict):
thought = response["description"].get("thought")
action_type = response.get(
Expand Down
46 changes: 25 additions & 21 deletions evaluate/step_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,34 @@ async def url_semantic_match(input_url, semantic_method, key=False):
class ElementEvaluator(StepEvaluator):
'''Element evaluation and scoring'''
@staticmethod
def path_exact_match(input_answer, reference_answer, method, html_content, input_netloc, reference_netloc):
def is_same_element(page, input_coord, reference_element_handle):
x,y=input_coord
# Get the bounding box of the element, usually 2s is enough, but set to 5s here
bounding_box = reference_element_handle.bounding_box(timeout=5000)
if bounding_box:
element_x = bounding_box['x']
element_y = bounding_box['y']
element_width = bounding_box['width']
element_height = bounding_box['height']
# Check if the given (x, y) is within the bounding box
if (element_x <= x <= element_x + element_width and
element_y <= y <= element_y + element_height):
return True
return False


@staticmethod
def path_exact_match(input_answer, reference_answer, method, page, input_netloc, reference_netloc,input_coords=None):
# input_coords should be (x,y) in pixels, if not None
# and will be used in ElementEvaluator.path_exact_match()
score = 0
if method == "xpath":
if reference_netloc != input_netloc:
# print("reference_netloc:", reference_netloc,
# "input_netloc:", input_netloc)
return 0
try:
html_content = await page.content()
tree = html.fromstring(html_content)
input_elements = tree.xpath(input_answer)
reference_elements = tree.xpath(reference_answer)
Expand All @@ -105,32 +125,16 @@ def path_exact_match(input_answer, reference_answer, method, html_content, input
pass
else:
score = 0
elif method == "selector":
elif method == "selector": #modified to use coords
if reference_netloc != input_netloc:
# print("reference_netloc:", reference_netloc,
# "input_netloc:", input_netloc)
return 0
try:
soup = BeautifulSoup(html_content, 'html.parser')
input_element = soup.select_one(input_answer)
reference_element = soup.select_one(reference_answer)
input_element = input_coords#input element is input coord
reference_element = page.locator(reference_answer)
if (input_element is not None) and (reference_element is not None):
score = input_element is reference_element

try:
if reference_element.name in MapTagNameList:
# parent_elements = reference_element.parent
# score_parent = input_element is parent_elements
# score = max(score, score_parent)
trace_up_count = 0
current_element = reference_element
while trace_up_count < 3 and score == 0:
trace_up_count += 1
current_element = current_element.parent
score_parent = input_element is current_element
score = max(score, score_parent)
except:
pass
score = ElementEvaluator().is_same_element(page, input_coord=input_element, reference_element_handle=reference_element)
except:
score = 0
# result_score = MatchFunction.include_match(
Expand Down