Skip to content

Commit

Permalink
A few items
Browse files Browse the repository at this point in the history
  • Loading branch information
jakep-allenai committed Nov 12, 2024
1 parent 4f2f4fd commit 691cc5a
Showing 1 changed file with 8 additions and 9 deletions.
17 changes: 8 additions & 9 deletions pdelfin/beakerpipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ class PageResult:
page_num: int
response: PageResponse

total_input_tokens: int
total_output_tokens: int
input_tokens: int
output_tokens: int


async def build_page_query(local_pdf_path: str, page: int, target_longest_image_dim: int, target_anchor_text_len: int, image_rotation: int=0) -> dict:
Expand Down Expand Up @@ -247,8 +247,8 @@ async def process_page(args, session: aiohttp.ClientSession, pdf_s3_path: str, p
pdf_s3_path,
page_num,
page_response,
total_input_tokens=base_response_data["usage"].get("prompt_tokens", 0),
total_output_tokens=base_response_data["usage"].get("completion_tokens", 0)
input_tokens=base_response_data["usage"].get("prompt_tokens", 0),
output_tokens=base_response_data["usage"].get("completion_tokens", 0)
)
except aiohttp.ClientError as e:
logger.warning(f"Client error on attempt {attempt} for {pdf_s3_path}-{page_num}:: {e}")
Expand Down Expand Up @@ -312,8 +312,8 @@ async def process_pdf(args, pdf_s3_path: str):
metadata = {
"Source-File": pdf_s3_path,
"pdf-total-pages": num_pages,
"total-input-tokens": sum(page.total_input_tokens for page in page_results),
"total-output-tokens": sum(page.total_output_tokens for page in page_results)
"total-input-tokens": sum(page.input_tokens for page in page_results),
"total-output-tokens": sum(page.output_tokens for page in page_results)
}

id_ = hashlib.sha1(document_text.encode()).hexdigest()
Expand Down Expand Up @@ -411,11 +411,10 @@ def _kill_proc():
last_queue_req = None # To track transitions
async def process_line(line):
# Parse the line and update semaphore if necessary
match = re.search(r'#running-req: (\d+), #queue-req: (\d+)', line)
match = re.search(r'#queue-req: (\d+)', line)
if match:
logger.info(line)
running_req = int(match.group(1))
queue_req = int(match.group(2))
queue_req = int(match.group(1))

nonlocal last_queue_req
if last_queue_req is not None and last_queue_req != 0 and queue_req == 0:
Expand Down

0 comments on commit 691cc5a

Please sign in to comment.