BrowserGym
BrowserGym copied to clipboard
Problem in click action/PDF interaction
It seems the click action does not work when agent want to click on View Pdf for archive article. Here is a sample code (parse_content_to_elements, and find_matching_anchor borrowed from here)
import re
import browsergym.core # register the openended task as a gym environment
from browsergym.utils.obs import flatten_axtree_to_str
from dataclasses import dataclass, field
def parse_content_to_elements(content: str):
"""Parse the observation content into a dictionary mapping anchors to their descriptions"""
elements = {}
current_anchor = None
description_lines = []
for line in content.split('\n'):
line = line.strip()
if not line:
continue
# Check for anchor line
anchor_match = re.match(r'\[(\d+)\](.*)', line)
if anchor_match:
# Save previous element if it exists
if current_anchor and description_lines:
elements[current_anchor] = ' '.join(description_lines)
# Start new element
current_anchor = anchor_match.group(1)
description_lines = [anchor_match.group(2).strip()]
else:
# Add to current description if we have an anchor
if current_anchor:
description_lines.append(line)
# Save last element
if current_anchor and description_lines:
elements[current_anchor] = ' '.join(description_lines)
return elements
def find_matching_anchor(content: str, selector: str):
"""Find the anchor ID that matches the given selector description"""
elements = parse_content_to_elements(content)
# Clean up selector and create a pattern
selector = selector.lower().strip()
for anchor, description in elements.items():
description = description.lower().strip()
if selector in description:
return anchor
return None
if __name__ == '__main__':
env = gym.make(
"browsergym/openended",
task_kwargs={"start_url": "https://www.google.com/"}, # starting URL
wait_for_user_message=False, # wait for a user message after each agent message sent to the chat
)
# run the environment <> agent loop until termination
obs, info = env.reset()
action0 = 'goto("https://arxiv.org/abs/1706.03762")'
obs, reward, terminated, truncated, info = env.step(action0)
print(obs["url"])
action1 = "noop(2000)"
obs, reward, terminated, truncated, info = env.step(action1)
print(obs["url"])
extra_element_properties={}
select = find_matching_anchor(flatten_axtree_to_str(obs["axtree_object"],
extra_properties=extra_element_properties,
with_clickable=True,
skip_generic=True,
filter_visible_only=True,
), "link 'View PDF',")
action2 = f'click("{select}", "left")'
print(action2)
obs, reward, terminated, truncated, info = env.step(action2)
print(obs["url"])
# release the environment
env.close()
The output is as
https://arxiv.org/abs/1706.03762
https://arxiv.org/abs/1706.03762
https://arxiv.org/abs/1706.03762
We can see after the click action the URL doesn't change while we expect we redirect to https://arxiv.org/pdf/1706.03762. I've tested for couple of archive articles and this did not work.