forked from divkakwani/webcorpus
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathsetup.py
123 lines (104 loc) · 4.77 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
"""A setuptools based setup module.
See:
https://packaging.python.org/guides/distributing-packages-using-setuptools/
/~https://github.com/pypa/sampleproject
"""
from setuptools import setup, find_packages
from os import path
here = path.abspath(path.dirname(__file__))
# Get the long description from the README file
with open(path.join(here, 'readme.md'), encoding='utf-8') as f:
long_description = f.read()
setup(
name='webcorpus',
version='1.0',
description='Generate large textual corpora for almost any language by crawling the web',
long_description=long_description,
long_description_content_type='text/markdown',
url='/~https://github.com/divkakwani/webcorpus',
author='Divyanshu Kakwani',
author_email='divkakwani@gmail.com',
classifiers=[ # Optional
# How mature is this project? Common values are
# 3 - Alpha
# 4 - Beta
# 5 - Production/Stable
'Development Status :: 4 - Beta',
# Indicate who your project is intended for
'Intended Audience :: Developers',
# Pick your license as you wish
'License :: OSI Approved :: MIT License',
# Specify the Python versions you support here. In particular, ensure
# that you indicate whether you support Python 2, Python 3 or both.
# These classifiers are *not* checked by 'pip install'. See instead
# 'python_requires' below.
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
],
# This field adds keywords for your project which will appear on the
# project page. What does your project relate to?
#
# Note that this is a string of words separated by whitespace, not a list.
keywords='dataset corpus', # Optional
# You can just specify package directories manually here if your project is
# simple. Or you can use find_packages().
#
# Alternatively, if you just want to distribute a single Python file, use
# the `py_modules` argument instead as follows, which will expect a file
# called `my_module.py` to exist:
#
# py_modules=["my_module"],
#
packages=find_packages(), # Required
# Specify which Python versions you support. In contrast to the
# 'Programming Language' classifiers above, 'pip install' will check this
# and refuse to install the project if the version does not match. If you
# do not support Python 2, you can simplify this to '>=3.5' or similar, see
# https://packaging.python.org/guides/distributing-packages-using-setuptools/#python-requires
python_requires='>=3.5',
# This field lists other packages that your project depends on to run.
# Any package you put here will be installed by pip when your project is
# installed, so they must be valid existing projects.
#
# For an analysis of "install_requires" vs pip's requirements files see:
# https://packaging.python.org/en/latest/requirements.html
install_requires=['morfessor', 'boilerpipe3', 'tldextract', 'click',
'scrapy', 'tqdm', 'pandas', 'scrapyd', 'nltk', 'scrapyd-client'], # Optional
# If there are data files included in your packages that need to be
# installed, specify them here.
#
# If using Python 2.6 or earlier, then these have to be included in
# MANIFEST.in as well.
include_package_data=True,
# package_data={ # Optional
# 'corpora': ['sources/*.csv', 'vendor/**/*'],
# },
# To provide executable scripts, use entry points in preference to the
# "scripts" keyword. Entry points provide cross-platform support and allow
# `pip` to create the appropriate form of executable for the target
# platform.
#
# For example, the following would provide a command called `sample` which
# executes the function `main` from this package when invoked:
entry_points={ # Optional
'console_scripts': [
'webcorpus=webcorpus:cli',
],
'scrapy': ['settings = webcorpus.crawlers.settings'],
},
# List additional URLs that are relevant to your project as a dict.
#
# This field corresponds to the "Project-URL" metadata fields:
# https://packaging.python.org/specifications/core-metadata/#project-url-multiple-use
#
# Examples listed include a pattern for specifying where the package tracks
# issues, where the source is hosted, where to say thanks to the package
# maintainers, and where to support the project financially. The key is
# what's used to render the link text on PyPI.
project_urls={ # Optional
'Bug Reports': '/~https://github.com/divkakwani/webcorpus/issues',
'Source': '/~https://github.com/divkakwani/webcorpus',
},
)