Skip to content

Commit

Permalink
Removing (deprecation) warnings (#530)
Browse files Browse the repository at this point in the history
1. Few warnings need fix in FARM
2. Can't remove warning from docx library.
  • Loading branch information
lalitpagaria authored Nov 2, 2020
1 parent f541916 commit 5d45992
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 8 deletions.
2 changes: 1 addition & 1 deletion haystack/document_store/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ def write_labels(self, labels: Union[List[Label], List[dict]], index: Optional[s

def update_document_meta(self, id: str, meta: Dict[str, str]):
body = {"doc": meta}
self.client.update(index=self.index, doc_type="_doc", id=id, body=body, refresh=self.refresh_type)
self.client.update(index=self.index, id=id, body=body, refresh=self.refresh_type)

def get_document_count(self, filters: Optional[Dict[str, List[str]]] = None, index: Optional[str] = None) -> int:
index = index or self.index
Expand Down
8 changes: 5 additions & 3 deletions haystack/preprocessor/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,12 +196,14 @@ def tika_convert_files_to_dicts(
last_para = ''
for para in paras:
para = para.strip()
if not para: continue
if not para:
continue
# merge paragraphs to improve qa
# merge this paragraph if less than 10 characters or 2 words
# or this paragraph starts with a lower case and last paragraph does not end with a punctuation
if merge_short and len(para) < 10 or len(re.findall('\s+', para)) < 2 \
or merge_lowercase and para and para[0].islower() and last_para and last_para[-1] not in '.?!"\'\]\)':
if merge_short and len(para) < 10 or len(re.findall(r'\s+', para)) < 2 \
or merge_lowercase and para and para[0].islower() and last_para \
and last_para[-1] not in r'.?!"\'\]\)':
last_para += ' ' + para
else:
if last_para:
Expand Down
1 change: 1 addition & 0 deletions haystack/reader/farm.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ def predict(self, question: str, documents: List[Document], top_k: Optional[int]
inputs.append(cur)

# get answers from QA model
# TODO: Need fix in FARM's `to_dict` function of `QAInput` class
predictions = self.inferencer.inference_from_objects(
objects=inputs, return_json=False, multiprocessing_chunksize=1
)
Expand Down
6 changes: 2 additions & 4 deletions haystack/retriever/dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,9 @@
from pathlib import Path
from tqdm import tqdm

from farm.infer import Inferencer

from haystack.document_store.base import BaseDocumentStore
from haystack import Document
from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
from haystack.retriever.base import BaseRetriever
from haystack.retriever.sparse import logger

from farm.infer import Inferencer
from farm.modeling.tokenization import Tokenizer
Expand Down Expand Up @@ -374,6 +370,8 @@ def embed(self, texts: Union[List[str], str]) -> List[np.array]:
assert type(texts) == list, "Expecting a list of texts, i.e. create_embeddings(texts=['text1',...])"

if self.model_format == "farm" or self.model_format == "transformers":
# TODO: FARM's `sample_to_features_text` need to fix following warning -
# tokenization_utils.py:460: FutureWarning: `is_pretokenized` is deprecated and will be removed in a future version, use `is_split_into_words` instead.
emb = self.embedding_model.inference_from_dicts(dicts=[{"text": t} for t in texts]) # type: ignore
emb = [(r["vec"]) for r in emb]
elif self.model_format == "sentence_transformers":
Expand Down

0 comments on commit 5d45992

Please sign in to comment.