Skip to content

Commit

Permalink
Logging fallback pages
Browse files Browse the repository at this point in the history
  • Loading branch information
jakep-allenai committed Nov 19, 2024
1 parent b0acfa8 commit 273a8b0
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 2 deletions.
3 changes: 2 additions & 1 deletion pdelfin/beakerpipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,8 @@ def build_dolma_document(pdf_s3_path, page_results):
"Source-File": pdf_s3_path,
"pdf-total-pages": len(page_results),
"total-input-tokens": sum(page.input_tokens for page in page_results),
"total-output-tokens": sum(page.output_tokens for page in page_results)
"total-output-tokens": sum(page.output_tokens for page in page_results),
"total-fallback-pages": sum(page.is_fallback for page in page_results),
}

id_ = hashlib.sha1(document_text.encode()).hexdigest()
Expand Down
2 changes: 1 addition & 1 deletion pdelfin/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
_MINOR = "1"
# On main and in a nightly release the patch should be one ahead of the last
# released build.
_PATCH = "35"
_PATCH = "36"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = ""
Expand Down

0 comments on commit 273a8b0

Please sign in to comment.