Integrate undetected selenium

SmartManoj · Dec 19, 2024 · 07a6e6e · 07a6e6e
1 parent 3b468f9
commit 07a6e6e
Show file tree

Hide file tree

Showing 13 changed files with 149 additions and 4 deletions.
diff --git a/Makefile b/Makefile
@@ -138,6 +138,7 @@ install-python-dependencies:
 		poetry run pip install chroma-hnswlib; \
 	fi
 
+	poetry run pip install -r requirements-extra.txt
 	@if [ -z "${RUN_WITHOUT_DOCKER}" ]; then \
 		poetry install --without llama-index; \
 	else \

diff --git a/containers/app/Dockerfile b/containers/app/Dockerfile
@@ -27,6 +27,7 @@ RUN apt-get update -y \
 COPY ./pyproject.toml ./poetry.lock ./
 RUN touch README.md
 RUN export POETRY_CACHE_DIR && poetry install --without evaluation,llama-index --no-root && rm -rf $POETRY_CACHE_DIR
+RUN poetry run pip install -r requirements-extra.txt
 
 FROM python:3.12.3-slim AS openhands-app
 

diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -458,13 +458,25 @@ def _get_messages(self, state: State) -> list[Message]:
         """
         if not self.prompt_manager:
             raise Exception('Prompt Manager not instantiated.')
+        if config.use_selenium:
+            extra_message = '''
 
+You have access to a selenium browser. You can use it using the driver python variable.
+
+Example:
+<execute_ipython>
+driver.current_url
+</execute_ipython>
+
+'''
+        else:
+            extra_message = ''
         messages: list[Message] = [
             Message(
                 role=system_role,
                 content=[
                     TextContent(
-                        text=self.prompt_manager.get_system_message(),
+                        text=self.prompt_manager.get_system_message() + extra_message,
                         cache_prompt=self.llm.is_caching_prompt_active(),
                     )
                 ],

diff --git a/openhands/core/config/app_config.py b/openhands/core/config/app_config.py
@@ -45,6 +45,7 @@ class AppConfig:
         file_uploads_restrict_file_types: Whether to restrict upload file types.
         file_uploads_allowed_extensions: Allowed file extensions. `['.*']` allows all.
         custom_instructions: Custom instructions for the agent.
+        use_selenium: Whether to use selenium.
     """
 
     llms: dict[str, LLMConfig] = field(default_factory=dict)
@@ -80,6 +81,8 @@ class AppConfig:
     override_UI_settings: bool = False
     runloop_api_key: str | None = None
     custom_instructions: str = ''
+    use_selenium: bool = False
+
 
     defaults_dict: ClassVar[dict] = {}
 

diff --git a/openhands/runtime/impl/eventstream/eventstream_runtime.py b/openhands/runtime/impl/eventstream/eventstream_runtime.py
@@ -170,6 +170,14 @@ def __init__(
                 'debug',
                 f'Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}',
             )
+        try:
+            path = 'sel/selenium_session_details.py'
+            self.copy_to(path, '/openhands/code/sel/')
+            path = 'sel/selenium_tester.py'
+            self.copy_to(path, '/openhands/code/sel/')
+            logger.info(f'Copied selenium files to runtime')
+        except Exception as e:
+            logger.error(f'Error copying selenium files to runtime: {e}')
 
     async def connect(self):
         self.send_status_message('STATUS$STARTING_RUNTIME')

diff --git a/openhands/runtime/plugins/agent_skills/agentskills.py b/openhands/runtime/plugins/agent_skills/agentskills.py
@@ -1,5 +1,5 @@
 from inspect import signature
-
+from sel.selenium_tester import driver
 from openhands.runtime.plugins.agent_skills import file_ops, file_reader
 from openhands.runtime.plugins.agent_skills.utils.dependency import import_functions
 
@@ -32,3 +32,4 @@
 from openhands.runtime.plugins.agent_skills.file_editor import file_editor  # noqa: E402
 
 __all__ += ['file_editor']
+__all__ += ['driver']
diff --git a/openhands/runtime/plugins/agent_skills/file_ops/academic_utils.py b/openhands/runtime/plugins/agent_skills/file_ops/academic_utils.py
@@ -2,6 +2,13 @@
 from fuzzywuzzy import fuzz
 import arxiv
 import os
+import requests
+from selenium.webdriver.common.by import By
+from sel.selenium_tester import driver
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from semanticscholar import SemanticScholar
+
 def clean_filename(filename: str):
     # remove special characters
     filename = re.sub(r'[^\w\s-]', '', filename)
@@ -43,8 +50,42 @@ def download_arxiv_pdf(query: str):
     else:
         print("No relevant results found")
 
+def download_pdf_from_url(url: str, name: str = None):
+    if name is None:
+        name = url.split('/')[-1]
+    with open(name, 'wb') as f:
+        f.write(requests.get(url).content)
+
+def download_semanticscholar_pdf(query: str = None, url: str = None):
+    sch = SemanticScholar()
+    if query:
+        results = sch.search_paper(query)
+        print(f'{results.total} results.', f'First occurrence: {results[0].title}.')
 
+        if results.total == 0:
+            print("No results found")
+            return
+        url = results[0].url
+    driver.get(url)
+    try:
+        s='[data-test-id="cookie-banner__dismiss-btn"]'
+        WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, s))).click()
+    except:
+        pass
+    s='[data-test-id="icon-disclosure"]'
+    WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, s))).click()
+    s='[data-test-id="paper-link"]'
+    link = driver.find_element(By.CSS_SELECTOR, s).get_attribute('href')
+    if 'arxiv' in link:
+        print(f"Downloading from {link}")
+        download_pdf_from_url(link)
+    else:
+        print(f"Download from {link}")
 if __name__ == "__main__":  
     query = "OpenHands: An Open Platform for AI Software Developers as Generalist Agents"
-    download_arxiv_pdf(query)
+    url = 'https://www.semanticscholar.org/paper/1d07e5b6f978cf69c0186f3d5f434fa92d471e46'
+    # download_semanticscholar_pdf(url=url)
+    url = 'https://arxiv.org/pdf/2407.16741.pdf'
+    download_pdf_from_url(url)
+
 
diff --git a/requirements-extra.txt b/requirements-extra.txt
@@ -1,4 +1,5 @@
 python-Levenshtein
 fuzzywuzzy
 arxiv
-libcst
+libcst
+undetected_chromedriver
diff --git a/sel/selenium_browser.py b/sel/selenium_browser.py
@@ -0,0 +1,38 @@
+import undetected_chromedriver as uc
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+import os
+os.chdir(os.path.dirname(os.path.abspath(__file__)))
+
+if __name__ == '__main__':
+    # Initialize Chrome options
+    options = webdriver.ChromeOptions()
+    options.add_argument('--disable-popup-blocking')
+    options.headless = False  # Set to True if headless mode is required
+
+    # Desired capabilities for logging
+    capabilities = webdriver.DesiredCapabilities().CHROME
+    capabilities["goog:loggingPrefs"] = {"performance": "ALL"}
+
+    # Launch the browser using undetected_chromedriver
+    driver = uc.Chrome(headless=False, use_subprocess=False, options=options)
+
+    # Save session details for reuse
+    command_url = driver.command_executor._url
+    session_id = driver.session_id
+
+    session_script = f"""
+url = '{command_url}'
+session_id = "{session_id}"
+"""
+
+    # Print session details
+    print(f"Command URL: {command_url}")
+    print(f"Session ID: {session_id}")
+
+    # Write session script to a file
+    session_file =  'selenium_session_details.py'
+    with open(session_file, 'w') as file:
+        file.write(session_script)
+
+    print(f"Session details saved to: {session_file}")
diff --git a/sel/selenium_session_details.py b/sel/selenium_session_details.py
@@ -0,0 +1,2 @@
+url = 'http://localhost:57072'
+session_id = "4dcc81cc2c4fc962e6a0dc38882092cf"
diff --git a/sel/selenium_tester.py b/sel/selenium_tester.py
@@ -0,0 +1,34 @@
+from selenium import webdriver
+
+from selenium.webdriver.remote import remote_connection
+from selenium.webdriver.remote.command import Command
+class SessionRemote(webdriver.Remote):
+    name = 'chrome'
+    def start_session(self, desired_capabilities, browser_profile=None):
+        w3c = True
+
+
+def create_driver(url,session_id):
+    rmt_con = remote_connection.RemoteConnection(url)
+    rmt_con._commands.update({
+        Command.UPLOAD_FILE: ("POST", "/session/$sessionId/file")
+    })
+    options = webdriver.ChromeOptions()
+    driver = SessionRemote(command_executor=rmt_con, options=options)
+    driver.session_id = session_id
+    return driver
+
+from sel.selenium_session_details import url,session_id
+driver = create_driver(url,session_id)  
+## import selenium keys
+if __name__ == '__main__':
+    print(driver.current_url)
+    s = '[data-test-id="icon-disclosure"]'
+    # click on the element
+    from selenium.webdriver.common.by import By
+    # driver.find_element(By.CSS_SELECTOR, s).click()
+    s='[data-test-id="paper-link"]'
+    link = driver.find_element(By.CSS_SELECTOR, s).get_attribute('href')
+    print(link)
+
+
diff --git a/sel/start_selenium.cmd b/sel/start_selenium.cmd
@@ -0,0 +1,2 @@
+@echo off
+python -i sel/selenium_browser.py
diff --git a/sel/start_selenium.sh b/sel/start_selenium.sh
@@ -0,0 +1 @@
+python3.12 -i sel/selenium_browser.py
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		url = 'http://localhost:57072'
		session_id = "4dcc81cc2c4fc962e6a0dc38882092cf"