diff --git a/haystack/document_stores/elasticsearch.py b/haystack/document_stores/elasticsearch.py index fb9f0bbcbd..c4d67d2b38 100644 --- a/haystack/document_stores/elasticsearch.py +++ b/haystack/document_stores/elasticsearch.py @@ -281,7 +281,7 @@ def _create_document_index(self, index_name: str, headers: Optional[Dict[str, st "mappings": { "properties": { self.name_field: {"type": "keyword"}, - self.content_field: {"type": "text"}, + self.content_field: {"type": "text"} }, "dynamic_templates": [ { @@ -301,13 +301,21 @@ def _create_document_index(self, index_name: str, headers: Optional[Dict[str, st } } } + if self.synonyms: + for field in self.search_fields: + mapping["mappings"]["properties"].update({field: {"type": "text", "analyzer": "synonym"}}) mapping["mappings"]["properties"][self.content_field] = {"type": "text", "analyzer": "synonym"} + mapping["settings"]["analysis"]["analyzer"]["synonym"] = {"tokenizer": "whitespace", "filter": ["lowercase", "synonym"]} mapping["settings"]["analysis"]["filter"] = {"synonym": {"type": self.synonym_type, "synonyms": self.synonyms}} + else: + for field in self.search_fields: + mapping["mappings"]["properties"].update({field: {"type": "text"}}) + if self.embedding_field: mapping["mappings"]["properties"][self.embedding_field] = {"type": "dense_vector", "dims": self.embedding_dim} @@ -1353,7 +1361,7 @@ def _create_document_index(self, index_name: str, headers: Optional[Dict[str, st "mappings": { "properties": { self.name_field: {"type": "keyword"}, - self.content_field: {"type": "text"}, + self.content_field: {"type": "text"} }, "dynamic_templates": [ { @@ -1373,6 +1381,21 @@ def _create_document_index(self, index_name: str, headers: Optional[Dict[str, st } } } + + if self.synonyms: + for field in self.search_fields: + mapping["mappings"]["properties"].update({field: {"type": "text", "analyzer": "synonym"}}) + mapping["mappings"]["properties"][self.content_field] = {"type": "text", "analyzer": "synonym"} + + mapping["settings"]["analysis"]["analyzer"]["synonym"] = {"tokenizer": "whitespace", + "filter": ["lowercase", + "synonym"]} + mapping["settings"]["analysis"]["filter"] = {"synonym": {"type": self.synonym_type, "synonyms": self.synonyms}} + + else: + for field in self.search_fields: + mapping["mappings"]["properties"].update({field: {"type": "text"}}) + if self.embedding_field: if self.similarity == "cosine": diff --git a/test/test_document_store.py b/test/test_document_store.py index 887e245e57..bc203291bf 100644 --- a/test/test_document_store.py +++ b/test/test_document_store.py @@ -1190,4 +1190,30 @@ def test_DeepsetCloudDocumentStore_query_by_embedding(deepset_cloud_document_sto ) emb_docs = deepset_cloud_document_store.query_by_embedding(query_emb) - assert len(emb_docs) == 0 \ No newline at end of file + assert len(emb_docs) == 0 + + +@pytest.mark.elasticsearch +def test_elasticsearch_search_field_mapping(): + + client = Elasticsearch() + client.indices.delete(index='haystack_search_field_mapping', ignore=[404]) + + index_data = [ + {"title": "Green tea components", + "meta": {"content": "The green tea plant contains a range of healthy compounds that make it into the final drink","sub_content":"Drink tip"},"id": "1"}, + {"title": "Green tea catechin", + "meta": {"content": "Green tea contains a catechin called epigallocatechin-3-gallate (EGCG).","sub_content":"Ingredients tip"}, "id": "2"}, + {"title": "Minerals in Green tea", + "meta": {"content": "Green tea also has small amounts of minerals that can benefit your health.","sub_content":"Minerals tip"}, "id": "3"}, + {"title": "Green tea Benefits", + "meta": {"content": "Green tea does more than just keep you alert, it may also help boost brain function.","sub_content":"Health tip"},"id": "4"} + ] + + document_store = ElasticsearchDocumentStore(index="haystack_search_field_mapping",search_fields=["content", "sub_content"],content_field= "title") + document_store.write_documents(index_data) + + indexed_settings = client.indices.get_mapping(index="haystack_search_field_mapping") + + assert indexed_settings["haystack_search_field_mapping"]["mappings"]["properties"]["content"]["type"] == 'text' + assert indexed_settings["haystack_search_field_mapping"]["mappings"]["properties"]["sub_content"]["type"] == 'text' \ No newline at end of file