diff --git a/crawler_news/settings.py b/crawler_news/settings.py index 8717547..f8f8920 100644 --- a/crawler_news/settings.py +++ b/crawler_news/settings.py @@ -65,7 +65,7 @@ # Configure item pipelines # See https://docs.scrapy.org/en/latest/topics/item-pipeline.html ITEM_PIPELINES = { - 'crawler_news.pipelines.CassandraPipeline': 300, + 'crawler_news.pipelines.CassandraPipeline': 300, } # Enable and configure the AutoThrottle extension (disabled by default) diff --git a/crawler_news/spiders/LibertyTimes.py b/crawler_news/spiders/LibertyTimes.py index b0f8d6d..d626b96 100644 --- a/crawler_news/spiders/LibertyTimes.py +++ b/crawler_news/spiders/LibertyTimes.py @@ -65,11 +65,11 @@ def _parse_publish_date(self, response): def _parse_authors(self, response): if re.match('https://sports', response.url): - return [response.css('article *::text').re_first(r'記者.*報導',defult='')] + return [response.css('article *::text').re_first(r'記者.*報導',default='')] elif re.match('https://partners', response.url): - return [response.css('article span::text').re_first(r'[0-9-]+ [0-9:]+',defult='')] + return [response.css('article span::text').re_first(r'[0-9-]+ [0-9:]+',default='')] else: - return [response.css('div.text>p *::text').re_first(r'記者.*報導',defult='')] + return [response.css('div.text>p *::text').re_first(r'記者.*報導',default='')] def _parse_tags(self, response): # no tags diff --git a/crawler_news/spiders/ettoday.py b/crawler_news/spiders/ettoday.py index 09471f2..dc7c575 100644 --- a/crawler_news/spiders/ettoday.py +++ b/crawler_news/spiders/ettoday.py @@ -59,7 +59,7 @@ def _parse_publish_date(self, response): return response.css('time.date::text').get().strip() def _parse_authors(self, response): - return [response.css('div.story>p *::text').re_first(r'記者.*報導', defult='')] + return [response.css('div.story>p *::text').re_first(r'記者.*報導', default='')] def _parse_tags(self, response): news_tags = []