From cfa2c8a5fc6a5ab18f99af36b61e3621b48b542c Mon Sep 17 00:00:00 2001 From: Jack Morris Date: Fri, 7 Aug 2020 11:25:14 -0400 Subject: [PATCH 1/2] add mirror for RT dataset --- datasets/rotten_tomatoes/rotten_tomatoes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets/rotten_tomatoes/rotten_tomatoes.py b/datasets/rotten_tomatoes/rotten_tomatoes.py index b2f173866e4..c89dea34fe1 100644 --- a/datasets/rotten_tomatoes/rotten_tomatoes.py +++ b/datasets/rotten_tomatoes/rotten_tomatoes.py @@ -42,7 +42,7 @@ } """ -_DOWNLOAD_URL = "http://www.cs.cornell.edu/people/pabo/movie-review-data/rt-polaritydata.tar.gz" +_DOWNLOAD_URL = "https://storage.googleapis.com/seldon-datasets/sentence_polarity_v1/rt-polaritydata.tar.gz" class RottenTomatoesMovieReview(nlp.GeneratorBasedBuilder): From 43dd351a3c3fc2bac8cd7d7378fb1ea3bab12e0c Mon Sep 17 00:00:00 2001 From: Jack Morris Date: Tue, 11 Aug 2020 14:11:19 -0400 Subject: [PATCH 2/2] update dataset_infos for mirror --- datasets/rotten_tomatoes/dataset_infos.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets/rotten_tomatoes/dataset_infos.json b/datasets/rotten_tomatoes/dataset_infos.json index 3b292afdc5d..c1c5c7da76c 100644 --- a/datasets/rotten_tomatoes/dataset_infos.json +++ b/datasets/rotten_tomatoes/dataset_infos.json @@ -1 +1 @@ -{"default": {"description": "Movie Review Dataset.\nThis is a dataset of containing 5,331 positive and 5,331 negative processed \nsentences from Rotten Tomatoes movie reviews. This data was first used in Bo \nPang and Lillian Lee, ``Seeing stars: Exploiting class relationships for \nsentiment categorization with respect to rating scales.'', Proceedings of the \nACL, 2005.\n", "citation": "@InProceedings{Pang+Lee:05a,\n author = {Bo Pang and Lillian Lee},\n title = {Seeing stars: Exploiting class relationships for sentiment\n categorization with respect to rating scales},\n booktitle = {Proceedings of the ACL},\n year = 2005\n}\n", "homepage": "http://www.cs.cornell.edu/people/pabo/movie-review-data/", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 2, "names": ["neg", "pos"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "supervised_keys": {"input": "", "output": ""}, "builder_name": "rotten_tomatoes_movie_review", "config_name": "default", "version": {"version_str": "1.0.0", "description": null, "nlp_version_to_prepare": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1074810, "num_examples": 8530, "dataset_name": "rotten_tomatoes_movie_review"}, "validation": {"name": "validation", "num_bytes": 134679, "num_examples": 1066, "dataset_name": "rotten_tomatoes_movie_review"}, "test": {"name": "test", "num_bytes": 135972, "num_examples": 1066, "dataset_name": "rotten_tomatoes_movie_review"}}, "download_checksums": {"http://www.cs.cornell.edu/people/pabo/movie-review-data/rt-polaritydata.tar.gz": {"num_bytes": 487770, "checksum": "a05befe52aafda71d458d188a1c54506a998b1308613ba76bbda2e5029409ce9"}}, "download_size": 487770, "dataset_size": 1345461, "size_in_bytes": 1833231}} \ No newline at end of file +{"default": {"description": "Movie Review Dataset.\nThis is a dataset of containing 5,331 positive and 5,331 negative processed \nsentences from Rotten Tomatoes movie reviews. This data was first used in Bo \nPang and Lillian Lee, ``Seeing stars: Exploiting class relationships for \nsentiment categorization with respect to rating scales.'', Proceedings of the \nACL, 2005.\n", "citation": "@InProceedings{Pang+Lee:05a,\n author = {Bo Pang and Lillian Lee},\n title = {Seeing stars: Exploiting class relationships for sentiment\n categorization with respect to rating scales},\n booktitle = {Proceedings of the ACL},\n year = 2005\n}\n", "homepage": "http://www.cs.cornell.edu/people/pabo/movie-review-data/", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 2, "names": ["neg", "pos"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "supervised_keys": {"input": "", "output": ""}, "builder_name": "rotten_tomatoes_movie_review", "config_name": "default", "version": {"version_str": "1.0.0", "description": null, "nlp_version_to_prepare": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1074810, "num_examples": 8530, "dataset_name": "rotten_tomatoes_movie_review"}, "validation": {"name": "validation", "num_bytes": 134679, "num_examples": 1066, "dataset_name": "rotten_tomatoes_movie_review"}, "test": {"name": "test", "num_bytes": 135972, "num_examples": 1066, "dataset_name": "rotten_tomatoes_movie_review"}}, "download_checksums": {"https://storage.googleapis.com/seldon-datasets/sentence_polarity_v1/rt-polaritydata.tar.gz": {"num_bytes": 487770, "checksum": "a05befe52aafda71d458d188a1c54506a998b1308613ba76bbda2e5029409ce9"}}, "download_size": 487770, "dataset_size": 1345461, "size_in_bytes": 1833231}} \ No newline at end of file