Skip to content

Commit

Permalink
Integrate undetected selenium
Browse files Browse the repository at this point in the history
  • Loading branch information
SmartManoj committed Dec 19, 2024
1 parent 3b468f9 commit 07a6e6e
Show file tree
Hide file tree
Showing 13 changed files with 149 additions and 4 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ install-python-dependencies:
poetry run pip install chroma-hnswlib; \
fi

poetry run pip install -r requirements-extra.txt
@if [ -z "${RUN_WITHOUT_DOCKER}" ]; then \
poetry install --without llama-index; \
else \
Expand Down
1 change: 1 addition & 0 deletions containers/app/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ RUN apt-get update -y \
COPY ./pyproject.toml ./poetry.lock ./
RUN touch README.md
RUN export POETRY_CACHE_DIR && poetry install --without evaluation,llama-index --no-root && rm -rf $POETRY_CACHE_DIR
RUN poetry run pip install -r requirements-extra.txt

FROM python:3.12.3-slim AS openhands-app

Expand Down
14 changes: 13 additions & 1 deletion openhands/agenthub/codeact_agent/codeact_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,13 +458,25 @@ def _get_messages(self, state: State) -> list[Message]:
"""
if not self.prompt_manager:
raise Exception('Prompt Manager not instantiated.')
if config.use_selenium:
extra_message = '''
You have access to a selenium browser. You can use it using the driver python variable.
Example:
<execute_ipython>
driver.current_url
</execute_ipython>
'''
else:
extra_message = ''
messages: list[Message] = [
Message(
role=system_role,
content=[
TextContent(
text=self.prompt_manager.get_system_message(),
text=self.prompt_manager.get_system_message() + extra_message,
cache_prompt=self.llm.is_caching_prompt_active(),
)
],
Expand Down
3 changes: 3 additions & 0 deletions openhands/core/config/app_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class AppConfig:
file_uploads_restrict_file_types: Whether to restrict upload file types.
file_uploads_allowed_extensions: Allowed file extensions. `['.*']` allows all.
custom_instructions: Custom instructions for the agent.
use_selenium: Whether to use selenium.
"""

llms: dict[str, LLMConfig] = field(default_factory=dict)
Expand Down Expand Up @@ -80,6 +81,8 @@ class AppConfig:
override_UI_settings: bool = False
runloop_api_key: str | None = None
custom_instructions: str = ''
use_selenium: bool = False


defaults_dict: ClassVar[dict] = {}

Expand Down
8 changes: 8 additions & 0 deletions openhands/runtime/impl/eventstream/eventstream_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,14 @@ def __init__(
'debug',
f'Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}',
)
try:
path = 'sel/selenium_session_details.py'
self.copy_to(path, '/openhands/code/sel/')
path = 'sel/selenium_tester.py'
self.copy_to(path, '/openhands/code/sel/')
logger.info(f'Copied selenium files to runtime')
except Exception as e:
logger.error(f'Error copying selenium files to runtime: {e}')

async def connect(self):
self.send_status_message('STATUS$STARTING_RUNTIME')
Expand Down
3 changes: 2 additions & 1 deletion openhands/runtime/plugins/agent_skills/agentskills.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from inspect import signature

from sel.selenium_tester import driver
from openhands.runtime.plugins.agent_skills import file_ops, file_reader
from openhands.runtime.plugins.agent_skills.utils.dependency import import_functions

Expand Down Expand Up @@ -32,3 +32,4 @@
from openhands.runtime.plugins.agent_skills.file_editor import file_editor # noqa: E402

__all__ += ['file_editor']
__all__ += ['driver']
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@
from fuzzywuzzy import fuzz
import arxiv
import os
import requests
from selenium.webdriver.common.by import By
from sel.selenium_tester import driver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from semanticscholar import SemanticScholar

def clean_filename(filename: str):
# remove special characters
filename = re.sub(r'[^\w\s-]', '', filename)
Expand Down Expand Up @@ -43,8 +50,42 @@ def download_arxiv_pdf(query: str):
else:
print("No relevant results found")

def download_pdf_from_url(url: str, name: str = None):
if name is None:
name = url.split('/')[-1]
with open(name, 'wb') as f:
f.write(requests.get(url).content)

def download_semanticscholar_pdf(query: str = None, url: str = None):
sch = SemanticScholar()
if query:
results = sch.search_paper(query)
print(f'{results.total} results.', f'First occurrence: {results[0].title}.')

if results.total == 0:
print("No results found")
return
url = results[0].url
driver.get(url)
try:
s='[data-test-id="cookie-banner__dismiss-btn"]'
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, s))).click()
except:
pass
s='[data-test-id="icon-disclosure"]'
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, s))).click()
s='[data-test-id="paper-link"]'
link = driver.find_element(By.CSS_SELECTOR, s).get_attribute('href')
if 'arxiv' in link:
print(f"Downloading from {link}")
download_pdf_from_url(link)
else:
print(f"Download from {link}")
if __name__ == "__main__":
query = "OpenHands: An Open Platform for AI Software Developers as Generalist Agents"
download_arxiv_pdf(query)
url = 'https://www.semanticscholar.org/paper/1d07e5b6f978cf69c0186f3d5f434fa92d471e46'
# download_semanticscholar_pdf(url=url)
url = 'https://arxiv.org/pdf/2407.16741.pdf'
download_pdf_from_url(url)


3 changes: 2 additions & 1 deletion requirements-extra.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
python-Levenshtein
fuzzywuzzy
arxiv
libcst
libcst
undetected_chromedriver
38 changes: 38 additions & 0 deletions sel/selenium_browser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import undetected_chromedriver as uc
from selenium import webdriver
from selenium.webdriver.common.by import By
import os
os.chdir(os.path.dirname(os.path.abspath(__file__)))

if __name__ == '__main__':
# Initialize Chrome options
options = webdriver.ChromeOptions()
options.add_argument('--disable-popup-blocking')
options.headless = False # Set to True if headless mode is required

# Desired capabilities for logging
capabilities = webdriver.DesiredCapabilities().CHROME
capabilities["goog:loggingPrefs"] = {"performance": "ALL"}

# Launch the browser using undetected_chromedriver
driver = uc.Chrome(headless=False, use_subprocess=False, options=options)

# Save session details for reuse
command_url = driver.command_executor._url
session_id = driver.session_id

session_script = f"""
url = '{command_url}'
session_id = "{session_id}"
"""

# Print session details
print(f"Command URL: {command_url}")
print(f"Session ID: {session_id}")

# Write session script to a file
session_file = 'selenium_session_details.py'
with open(session_file, 'w') as file:
file.write(session_script)

print(f"Session details saved to: {session_file}")
2 changes: 2 additions & 0 deletions sel/selenium_session_details.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
url = 'http://localhost:57072'
session_id = "4dcc81cc2c4fc962e6a0dc38882092cf"
34 changes: 34 additions & 0 deletions sel/selenium_tester.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from selenium import webdriver

from selenium.webdriver.remote import remote_connection
from selenium.webdriver.remote.command import Command
class SessionRemote(webdriver.Remote):
name = 'chrome'
def start_session(self, desired_capabilities, browser_profile=None):
w3c = True


def create_driver(url,session_id):
rmt_con = remote_connection.RemoteConnection(url)
rmt_con._commands.update({
Command.UPLOAD_FILE: ("POST", "/session/$sessionId/file")
})
options = webdriver.ChromeOptions()
driver = SessionRemote(command_executor=rmt_con, options=options)
driver.session_id = session_id
return driver

from sel.selenium_session_details import url,session_id
driver = create_driver(url,session_id)
## import selenium keys
if __name__ == '__main__':
print(driver.current_url)
s = '[data-test-id="icon-disclosure"]'
# click on the element
from selenium.webdriver.common.by import By
# driver.find_element(By.CSS_SELECTOR, s).click()
s='[data-test-id="paper-link"]'
link = driver.find_element(By.CSS_SELECTOR, s).get_attribute('href')
print(link)


2 changes: 2 additions & 0 deletions sel/start_selenium.cmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
@echo off
python -i sel/selenium_browser.py
1 change: 1 addition & 0 deletions sel/start_selenium.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python3.12 -i sel/selenium_browser.py

0 comments on commit 07a6e6e

Please sign in to comment.