Skip to content

Commit

Permalink
updated .gitignore, added mega.py, rm pdfs, cleanups
Browse files Browse the repository at this point in the history
Signed-off-by: wiseaidev <business@wiseai.dev>
  • Loading branch information
wiseaidev committed Oct 3, 2022
1 parent fc13fdc commit bdd8a99
Show file tree
Hide file tree
Showing 14 changed files with 126 additions and 97 deletions.
107 changes: 51 additions & 56 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,47 +1,12 @@

Skip to content
Pull requests
Issues
Marketplace
Explore
@marshalmiller
marshalmiller /
linkrot-dev


0
0

96

Code
Issues 1
Pull requests
Actions
Projects
Wiki
Security
Insights

Settings

linkrot-dev/.gitignore
@marshalmiller
marshalmiller Update .gitignore
Latest commit 63280e5 8 days ago
History
@marshalmiller
62 lines (52 sloc) 733 Bytes
#Mac
.DS_Store

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
.*_cache

*$py.class
.idea/
# C extensions
*.so
# pdf files
*.pdf

# Distribution / packaging
.Python
Expand All @@ -57,10 +22,10 @@ lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
venv/

# PyInstaller
# Usually these files are written by a python script from a template
Expand All @@ -80,33 +45,63 @@ htmlcov/
.cache
nosetests.xml
coverage.xml
*,cover
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

© 2021 GitHub, Inc.
Terms
Privacy
Security
Status
Docs

Contact GitHub
Pricing
API
Training
Blog
About

Loading complete
# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# dotenv
.env

# virtualenv
.venv
venv/
ENV/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/

# IDE settings
.vscode/
34 changes: 18 additions & 16 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,17 @@ readme = "README.md"
license = "MIT"
requires-python = ">=3.7"
authors = [
{ name = "Marshal Miller", email = "marshal@marshalmiller.com" },
{ name = "Marshal Miller", email = "marshal@marshalmiller.com" },
]
keywords = [
"pdf",
"reference",
"linkrot",
"pdf",
"reference",
"linkrot",
]
classifiers = [
"Development Status :: 5 - Production/Stable",
"Environment :: Console",
"License :: OSI Approved :: Apache Software License",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
Expand All @@ -38,17 +38,19 @@ dependencies = [

[project.optional-dependencies]
dev = [
"black==22.8.0",
"coverage==6.5.0",
"flake8==5.0.4",
"mccabe<=0.7.0",
"mypy==0.981",
"pylint==2.15.3",
"pytest==7.1.3",
"build==0.8.0",
"black==22.8.0",
"coverage==6.5.0",
"flake8==5.0.4",
"mccabe<=0.7.0",
"mypy==0.981",
"pylint==2.15.3",
"pytest==7.1.3",
"build==0.8.0",
"mega.py==1.0.8",
]
test = [
"pytest==7.1.3",
"pytest==7.1.3",
"mega.py==1.0.8",
]

[project.urls]
Expand All @@ -59,12 +61,12 @@ path = "linkrot/__init__.py"

[tool.hatch.build.targets.sdist]
include = [
"/linkrot",
"/linkrot",
]

[tool.hatch.build.targets.wheel]
include = [
"/linkrot",
"/linkrot",
]

[project.scripts]
Expand Down
9 changes: 9 additions & 0 deletions tests/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
PDFS = {
"email_test_single_page.pdf": "https://mega.nz/file/akRAXJhT#u5hWvL4JJmoz8JUQdJw4Nfnzw-WnXc2qwCAcVZwbLcM",
"embedded_link_image.pdf": "https://mega.nz/file/nsBgDSxT#dZIjpEb437SLxH-26P9V8cSNwVc9kTpqLFCP9x6CEeg",
"embedded_link_testcase.pdf": "https://mega.nz/file/C5gGTJJR#GUbraILCDX1j4QnVLEbtoKkzxBbJqN-AUBNWCXPFRS8",
"i14doc1.pdf": "https://mega.nz/file/u1owmDZa#xqeMALTWhA5UVyT7PRbevRzXmPU8pgBbjN0vzt6aRgo",
"i14doc2.pdf": "https://mega.nz/file/ehRwSBbC#BqaIZd4XKhdtmRguibffiTX8tA39m6cTSDx-OFFKQqs",
"invalid.pdf": "https://mega.nz/file/Hs4VHAII#dd9F6Uonk6c4tVnSO6VyeqQEkXh65EhMq6sT8zlIowY",
"valid.pdf": "https://mega.nz/file/Xp5RkbaI#3pRFF-kTQFh-ZwOs0O6JqqNdXhsjLuBNxZB2NDgl8Mc",
}
19 changes: 19 additions & 0 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import pytest
from mega import Mega
import shutil
import os
from constants import PDFS


@pytest.fixture(scope="module")
def download_pdfs():
# setup
os.mkdir("./tests/pdfs")
# mega client
mega = Mega()
m = mega.login()
for url in PDFS.values():
m.download_url(url, "./tests/pdfs")
yield
# teardown
shutil.rmtree("./tests/pdfs")
Binary file removed tests/pdfs/email_test_single_page.pdf
Binary file not shown.
Binary file removed tests/pdfs/embedded_link_image.pdf
Binary file not shown.
Binary file removed tests/pdfs/embedded_link_testcase.pdf
Binary file not shown.
Binary file removed tests/pdfs/i14doc1.pdf
Binary file not shown.
Binary file removed tests/pdfs/i14doc2.pdf
Binary file not shown.
Binary file removed tests/pdfs/invalid.pdf
Binary file not shown.
Binary file removed tests/pdfs/valid.pdf
Binary file not shown.
7 changes: 2 additions & 5 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
import os
from linkrot import cli
# import pytest

curdir = os.path.dirname(os.path.realpath(__file__))


def test_cli():
parser = cli.create_parser()
parsed = parser.parse_args(['-j', 'pdfs/valid.pdf'])
parsed = parser.parse_args(["-j", "./tests/pdfs/valid.pdf"])
assert parsed.json
assert parsed.pdf == "pdfs/valid.pdf"
assert parsed.pdf == "./tests/pdfs/valid.pdf"
5 changes: 4 additions & 1 deletion tests/test_downloader.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,26 @@
import os
import pytest

from linkrot.downloader import sanitize_url


def test_should_not_add_http_to_upper_case_url():
result = sanitize_url("HTTP://WWW.TRACFONE.COM/TERMSANDCONDITIONS#RETURNPOLICY")
expected = "HTTP://WWW.TRACFONE.COM/TERMSANDCONDITIONS#RETURNPOLICY"
assert result == expected


def test_should_add_http_to_upper_case_url():
result = sanitize_url("WWW.TRACFONE.COM/TERMSANDCONDITIONS#RETURNPOLICY")
expected = "http://WWW.TRACFONE.COM/TERMSANDCONDITIONS#RETURNPOLICY"
assert result == expected


def test_should_not_add_http_to_lower_case_url():
result = sanitize_url("http://google.com")
expected = "http://google.com"
assert result == expected


def test_should_add_http_to_lower_case_url():
result = sanitize_url("google.com")
expected = "http://google.com"
Expand Down
42 changes: 23 additions & 19 deletions tests/test_linkrot.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,56 @@

import os
import linkrot
from linkrot.exceptions import FileNotFoundError, DownloadError, PDFInvalidError
from linkrot import linkrot
from fixtures import download_pdfs
import pytest

curdir = os.path.dirname(os.path.realpath(__file__))

def test_all():
with pytest.raises(linkrot.exceptions.FileNotFoundError):
linkrot.linkrot("asd")

with pytest.raises(linkrot.exceptions.DownloadError):
linkrot.linkrot("http://invalid.com/404.pdf")
@pytest.mark.parametrize(
"test_case, expected",
[
("asd", FileNotFoundError),
("http://invalid.com/404.pdf", DownloadError),
("./tests/pdfs/invalid.pdf", PDFInvalidError),
],
)
def test_linkrot_exceptions(download_pdfs, test_case, expected):
with pytest.raises(expected):
linkrot(test_case)

with pytest.raises(linkrot.exceptions.PDFInvalidError):
linkrot.linkrot(os.path.join(curdir, "pdfs/invalid.pdf"))

pdf = linkrot.linkrot(os.path.join(curdir, "pdfs/valid.pdf"))
def test_valid_pdf():
pdf = linkrot("./tests/pdfs/valid.pdf")
urls = pdf.get_references(reftype="pdf")
assert len(urls) == 18
# pdf.download_pdfs("/tmp/")


def test_two_pdfs():
linkrot.linkrot(os.path.join(curdir, "pdfs/i14doc1.pdf"))
pdf_2 = linkrot.linkrot(os.path.join(curdir, "pdfs/i14doc2.pdf"))
linkrot("./tests/pdfs/i14doc1.pdf")
pdf_2 = linkrot("./tests/pdfs/i14doc2.pdf")
assert len(pdf_2.get_references()) == 2


def test_pdf_with_email_address():
pdf_with_email_addresses = linkrot.linkrot(os.path.join(curdir, "pdfs/email_test_single_page.pdf"))
pdf_with_email_addresses = linkrot("./tests/pdfs/email_test_single_page.pdf")
references = pdf_with_email_addresses.get_references()
# there are only 2 email references in the pdf that should be excluded
assert len(references) == 0


def test_pdf_with_embedded_links():
pdf_with_embedded_links = linkrot.linkrot(os.path.join(curdir, "pdfs/embedded_link_testcase.pdf"))
pdf_with_embedded_links = linkrot("./tests/pdfs/embedded_link_testcase.pdf")
references = pdf_with_embedded_links.get_references()

assert len(references) == 7


def test_pdf_with_embedded_link_in_image():
pdf_with_embedded_link_in_image = linkrot.linkrot(os.path.join(curdir, "pdfs/embedded_link_image.pdf"))
pdf_with_embedded_link_in_image = linkrot("./tests/pdfs/embedded_link_image.pdf")
references = pdf_with_embedded_link_in_image.get_references()
# assert that the reference was found
assert len(references) == 1
# get the reference from the set
image_ref = references.pop()

EMBEDDED_LINK_IN_IMAGE = "/~https://github.com/marshalmiller/linkrot/blob/6e6fb45239f8d06e89671e2ec68a11629747355d/branding/Asset%207@4x.png"
assert image_ref.ref == EMBEDDED_LINK_IN_IMAGE
assert image_ref.ref == EMBEDDED_LINK_IN_IMAGE

0 comments on commit bdd8a99

Please sign in to comment.