From b86795c3559d0076a05ac048b8c5c571efbae2ea Mon Sep 17 00:00:00 2001
From: Simon Jenkins <simonj2@gmail.com>
Date: Mon, 1 Aug 2022 15:57:10 -0700
Subject: [PATCH 1/2] Initial service commit

---
 .gitignore                                   |   4 +
 README.md                                    |  36 +++
 api.py                                       |  45 ++++
 docker-compose.yml                           | 144 +++++++++++
 models/__init__.py                           |   0
 models/product.py                            | 241 +++++++++++++++++++
 models/request.py                            |  46 ++++
 requirements.txt                             |  24 ++
 scripts/data-fields.txt                      | 179 ++++++++++++++
 scripts/es_query.py                          |  24 ++
 scripts/generate_product_from_data_fields.py |  76 ++++++
 scripts/http_query.py                        |  33 +++
 scripts/perform_import.py                    |  91 +++++++
 utils/__init__.py                            |   0
 utils/analyzers.py                           |   8 +
 utils/connection.py                          |   5 +
 utils/constants.py                           |  59 +++++
 utils/response.py                            |  28 +++
 18 files changed, 1043 insertions(+)
 create mode 100644 api.py
 create mode 100644 docker-compose.yml
 create mode 100644 models/__init__.py
 create mode 100644 models/product.py
 create mode 100644 models/request.py
 create mode 100644 requirements.txt
 create mode 100644 scripts/data-fields.txt
 create mode 100644 scripts/es_query.py
 create mode 100644 scripts/generate_product_from_data_fields.py
 create mode 100644 scripts/http_query.py
 create mode 100644 scripts/perform_import.py
 create mode 100644 utils/__init__.py
 create mode 100644 utils/analyzers.py
 create mode 100644 utils/connection.py
 create mode 100644 utils/constants.py
 create mode 100644 utils/response.py

diff --git a/.gitignore b/.gitignore
index b6e47617..58669e3d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -127,3 +127,7 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# Pycharm
+.idea/
+.DS_Store
\ No newline at end of file
diff --git a/README.md b/README.md
index cca41f6d..799502cf 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,38 @@
 # openfoodfacts-search
 Open Food Facts Search API V3 using ElasticSearch - https://wiki.openfoodfacts.org/Search_API_V3
+
+This API is currently in development. It is not serving any production traffic. The [Work Plan](https://wiki.openfoodfacts.org/Search_API_V3#Work_Plan) will be updated as development continues
+
+### Organization
+The main file is `api.py`, and the Product schema is in `models/product.py`. 
+
+The `scripts/` directory contains various scripts for manual validation, constructing the product schema, importing, etc.
+
+### Running locally
+Docker spins up:
+- Two elasticsearch nodes
+- [Elasticvue](https://elasticvue.com/)
+
+You will then need to import from CSV (see instructions below).
+
+Make sure your environment is configured:
+```commandline
+export ELASTIC_PASSWORD=PASSWORD_HERE
+```
+
+
+### Helpful commands:
+
+To start docker:
+```console
+docker-compose up -d
+```
+
+To start server:
+```console
+uvicorn api:app --reload
+```
+
+To import data from the [CSV export](https://world.openfoodfacts.org/data):
+```console
+python scripts/perform_import.py --filename=/path/to/file.csv
\ No newline at end of file
diff --git a/api.py b/api.py
new file mode 100644
index 00000000..4ad2cb8b
--- /dev/null
+++ b/api.py
@@ -0,0 +1,45 @@
+from elasticsearch_dsl import Q
+from fastapi import FastAPI, HTTPException
+
+from models.product import Product
+from models.request import AutocompleteRequest, SearchRequest
+from utils import connection, constants, response
+
+app = FastAPI()
+connection.get_connection()
+
+
+# TODO: Remove this commented out code, so that it's not confusing about where the current GET API is served
+# (retaining temporarily as a proof of concept)
+# @app.get("/{barcode}")
+# def get_product(barcode: str):
+#     results = Product.search().query("match", code=barcode).execute()
+#     results_dict = [r.to_dict() for r in results]
+#
+#     if not results_dict:
+#         raise HTTPException(status_code=404, detail="Barcode not found")
+#
+#     product = results_dict[0]
+#     return product
+
+@app.post("/autocomplete")
+def autocomplete(request: AutocompleteRequest):
+    # TODO: This function needs unit testing
+    if not request.search_fields:
+        request.search_fields = constants.AUTOCOMPLETE_FIELDS
+    for field in request.search_fields:
+        if field not in constants.AUTOCOMPLETE_FIELDS:
+            raise HTTPException(status_code=400, detail="Invalid field: {}".format(field))
+
+    match_queries = []
+    for field in request.search_fields:
+        match_queries.append(Q('match', **{field: request.text}))
+
+    results = Product.search().query('bool', should=match_queries).extra(size=request.get_num_results()).execute()
+    resp = response.create_response(results, request)
+    return resp
+
+
+@app.post("/search")
+def search(request: SearchRequest):
+    raise NotImplementedError()
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 00000000..b9aabc16
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,144 @@
+version: "2.2"
+
+services:
+  setup:
+    image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
+    volumes:
+      - certs:/usr/share/elasticsearch/config/certs
+    user: "0"
+    command: >
+      bash -c '
+        if [ x${ELASTIC_PASSWORD} == x ]; then
+          echo "Set the ELASTIC_PASSWORD environment variable in the .env file";
+          exit 1;
+        fi;
+        if [ ! -f config/certs/ca.zip ]; then
+          echo "Creating CA";
+          bin/elasticsearch-certutil ca --silent --pem -out config/certs/ca.zip;
+          unzip config/certs/ca.zip -d config/certs;
+        fi;
+        if [ ! -f config/certs/certs.zip ]; then
+          echo "Creating certs";
+          echo -ne \
+          "instances:\n"\
+          "  - name: es01\n"\
+          "    dns:\n"\
+          "      - es01\n"\
+          "      - localhost\n"\
+          "    ip:\n"\
+          "      - 127.0.0.1\n"\
+          "  - name: es02\n"\
+          "    dns:\n"\
+          "      - es02\n"\
+          "      - localhost\n"\
+          "    ip:\n"\
+          "      - 127.0.0.1\n"\
+          > config/certs/instances.yml;
+          bin/elasticsearch-certutil cert --silent --pem -out config/certs/certs.zip --in config/certs/instances.yml --ca-cert config/certs/ca/ca.crt --ca-key config/certs/ca/ca.key;
+          unzip config/certs/certs.zip -d config/certs;
+        fi;
+        echo "Setting file permissions"
+        chown -R root:root config/certs;
+        find . -type d -exec chmod 750 \{\} \;;
+        find . -type f -exec chmod 640 \{\} \;;
+        echo "Waiting for Elasticsearch availability";
+        until curl -s --cacert config/certs/ca/ca.crt https://es01:9200 | grep -q "missing authentication credentials"; do sleep 30; done;
+        echo "All done!";
+      '
+    healthcheck:
+      test: ["CMD-SHELL", "[ -f config/certs/es01/es01.crt ]"]
+      interval: 1s
+      timeout: 5s
+      retries: 120
+
+  es01:
+    depends_on:
+      setup:
+        condition: service_healthy
+    image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
+    volumes:
+      - certs:/usr/share/elasticsearch/config/certs
+      - esdata01:/usr/share/elasticsearch/data
+    ports:
+      - ${ES_PORT}:9200
+    environment:
+      - node.name=es01
+      - cluster.name=${CLUSTER_NAME}
+      - cluster.initial_master_nodes=es01,es02
+      - discovery.seed_hosts=es02
+      - ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
+      - bootstrap.memory_lock=true
+      - xpack.security.enabled=false
+      - xpack.license.self_generated.type=${LICENSE}
+      - http.cors.enabled=true
+      - http.cors.allow-origin=http://localhost:8080,http://127.0.0.1:8080
+      - http.cors.allow-headers=X-Requested-With,X-Auth-Token,Content-Type,Content-Length,Authorization
+      - http.cors.allow-credentials=true
+    mem_limit: ${MEM_LIMIT}
+    ulimits:
+      memlock:
+        soft: -1
+        hard: -1
+    healthcheck:
+      test:
+        [
+          "CMD-SHELL",
+          "curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'",
+        ]
+      interval: 10s
+      timeout: 10s
+      retries: 120
+
+  es02:
+    depends_on:
+      - es01
+    image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
+    volumes:
+      - certs:/usr/share/elasticsearch/config/certs
+      - esdata02:/usr/share/elasticsearch/data
+    environment:
+      - node.name=es02
+      - cluster.name=${CLUSTER_NAME}
+      - cluster.initial_master_nodes=es01,es02
+      - discovery.seed_hosts=es01
+      - bootstrap.memory_lock=true
+      - xpack.security.enabled=false
+      - xpack.license.self_generated.type=${LICENSE}
+      - http.cors.enabled=true
+      - http.cors.allow-origin=http://localhost:8080,http://127.0.0.1:8080
+      - http.cors.allow-headers=X-Requested-With,X-Auth-Token,Content-Type,Content-Length,Authorization
+      - http.cors.allow-credentials=true
+    mem_limit: ${MEM_LIMIT}
+    ulimits:
+      memlock:
+        soft: -1
+        hard: -1
+    healthcheck:
+      test:
+        [
+          "CMD-SHELL",
+          "curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'",
+        ]
+      interval: 10s
+      timeout: 10s
+      retries: 120
+
+
+  # elasticsearch browser
+  elasticvue:
+      image: cars10/elasticvue
+      container_name: elasticvue
+      ports:
+          - '8080:8080'
+      links:
+          - es01
+
+volumes:
+  certs:
+    driver: local
+  esdata01:
+    driver: local
+  esdata02:
+    driver: local
+  esdata03:
+    driver: local
\ No newline at end of file
diff --git a/models/__init__.py b/models/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/models/product.py b/models/product.py
new file mode 100644
index 00000000..04087639
--- /dev/null
+++ b/models/product.py
@@ -0,0 +1,241 @@
+from elasticsearch_dsl import Document, Date, Double, Keyword, Text, Integer
+
+from utils import constants
+from utils.analyzers import autocomplete
+
+
+class Product(Document):
+    """
+    This should mirror the fields here: /~https://github.com/openfoodfacts/openfoodfacts-server/blob/main/html/data/data-fields.txt
+    Use scripts/generate_product_from_data_fields.py to regenerate from data-fields.txt, but be careful for manual
+    adjustments
+    """
+
+    class Index:
+        name = constants.INDEX_ALIAS
+        settings = {
+            "number_of_shards": 4,
+        }
+
+    # barcode of the product (can be EAN-13 or internal codes for some food stores), for products without a barcode, Open Food Facts assigns a number starting with the 200 reserved prefix
+    code = Keyword()
+    # url of the product page on Open Food Facts
+    url = Keyword()
+    # contributor who first added the product
+    creator = Keyword()
+    # date that the product was added (UNIX timestamp format)
+    created_t = Integer()
+    # date that the product was added (iso8601 format: yyyy-mm-ddThh:mn:ssZ)
+    created_datetime = Date()
+    # date that the product page was last modified
+    last_modified_t = Integer()
+    last_modified_datetime = Date()
+    # name of the product
+    product_name = Text(analyzer='snowball', fields={'autocomplete': Text(analyzer=autocomplete), 'raw': Keyword()})
+    generic_name = Keyword()
+    # quantity and unit
+    quantity = Keyword()
+    # shape, material
+    packaging = Keyword()
+    packaging_tags = Text(multi=True)
+    brands = Text(analyzer='snowball', fields={'autocomplete': Text(analyzer=autocomplete), 'raw': Keyword()})
+    brands_tags = Text(multi=True)
+    categories = Text(analyzer='snowball', fields={'autocomplete': Text(analyzer=autocomplete), 'raw': Keyword()})
+    categories_tags = Text(multi=True)
+    categories_fr = Keyword()
+    # origins of ingredients
+    origins = Keyword()
+    origins_tags = Text(multi=True)
+    # places where manufactured or transformed
+    manufacturing_places = Keyword()
+    manufacturing_places_tags = Text(multi=True)
+    labels = Keyword()
+    labels_tags = Text(multi=True)
+    labels_fr = Keyword()
+    emb_codes = Keyword()
+    emb_codes_tags = Text(multi=True)
+    # coordinates corresponding to the first packaging code indicated
+    first_packaging_code_geo = Keyword()
+    cities = Keyword()
+    cities_tags = Text(multi=True)
+    purchase_places = Keyword()
+    stores = Keyword()
+    # list of countries where the product is sold
+    countries = Keyword()
+    countries_tags = Text(multi=True)
+    countries_fr = Keyword()
+    ingredients_text = Keyword()
+    traces = Keyword()
+    traces_tags = Text(multi=True)
+    # serving size in g
+    serving_size = Keyword()
+    # indicates if the nutrition facts are indicated on the food label
+    no_nutriments = Keyword()
+    # number of food additives
+    additives_n = Keyword()
+    additives = Keyword()
+    additives_tags = Text(multi=True)
+    ingredients_from_palm_oil_n = Keyword()
+    ingredients_from_palm_oil = Keyword()
+    ingredients_from_palm_oil_tags = Text(multi=True)
+    ingredients_that_may_be_from_palm_oil_n = Keyword()
+    ingredients_that_may_be_from_palm_oil = Keyword()
+    ingredients_that_may_be_from_palm_oil_tags = Text(multi=True)
+    # nutrition grade ('a' to 'e'). see https://fr.openfoodfacts.org/nutriscore
+    nutrition_grade_fr = Keyword()
+    main_category = Keyword()
+    main_category_fr = Keyword()
+    image_url = Keyword()
+    image_small_url = Keyword()
+    energy_100g = Double()
+    energy_kj_100g = Double()
+    energy_kcal_100g = Double()
+    proteins_100g = Double()
+    casein_100g = Double()
+    serum_proteins_100g = Double()
+    nucleotides_100g = Double()
+    carbohydrates_100g = Double()
+    sugars_100g = Double()
+    sucrose_100g = Double()
+    glucose_100g = Double()
+    fructose_100g = Double()
+    lactose_100g = Double()
+    maltose_100g = Double()
+    maltodextrins_100g = Double()
+    starch_100g = Double()
+    polyols_100g = Double()
+    fat_100g = Double()
+    saturated_fat_100g = Double()
+    butyric_acid_100g = Double()
+    caproic_acid_100g = Double()
+    caprylic_acid_100g = Double()
+    capric_acid_100g = Double()
+    lauric_acid_100g = Double()
+    myristic_acid_100g = Double()
+    palmitic_acid_100g = Double()
+    stearic_acid_100g = Double()
+    arachidic_acid_100g = Double()
+    behenic_acid_100g = Double()
+    lignoceric_acid_100g = Double()
+    cerotic_acid_100g = Double()
+    montanic_acid_100g = Double()
+    melissic_acid_100g = Double()
+    monounsaturated_fat_100g = Double()
+    polyunsaturated_fat_100g = Double()
+    omega_3_fat_100g = Double()
+    alpha_linolenic_acid_100g = Double()
+    eicosapentaenoic_acid_100g = Double()
+    docosahexaenoic_acid_100g = Double()
+    omega_6_fat_100g = Double()
+    linoleic_acid_100g = Double()
+    arachidonic_acid_100g = Double()
+    gamma_linolenic_acid_100g = Double()
+    dihomo_gamma_linolenic_acid_100g = Double()
+    omega_9_fat_100g = Double()
+    oleic_acid_100g = Double()
+    elaidic_acid_100g = Double()
+    gondoic_acid_100g = Double()
+    mead_acid_100g = Double()
+    erucic_acid_100g = Double()
+    nervonic_acid_100g = Double()
+    trans_fat_100g = Double()
+    cholesterol_100g = Double()
+    fiber_100g = Double()
+    sodium_100g = Double()
+    # % vol of alcohol
+    alcohol_100g = Double()
+    vitamin_a_100g = Double()
+    vitamin_d_100g = Double()
+    vitamin_e_100g = Double()
+    vitamin_k_100g = Double()
+    vitamin_c_100g = Double()
+    vitamin_b1_100g = Double()
+    vitamin_b2_100g = Double()
+    vitamin_pp_100g = Double()
+    vitamin_b6_100g = Double()
+    vitamin_b9_100g = Double()
+    vitamin_b12_100g = Double()
+    # also known as Vitamine B8
+    biotin_100g = Double()
+    # also known as Vitamine B5
+    pantothenic_acid_100g = Double()
+    silica_100g = Double()
+    bicarbonate_100g = Double()
+    potassium_100g = Double()
+    chloride_100g = Double()
+    calcium_100g = Double()
+    phosphorus_100g = Double()
+    iron_100g = Double()
+    magnesium_100g = Double()
+    zinc_100g = Double()
+    copper_100g = Double()
+    manganese_100g = Double()
+    fluoride_100g = Double()
+    selenium_100g = Double()
+    chromium_100g = Double()
+    molybdenum_100g = Double()
+    iodine_100g = Double()
+    caffeine_100g = Double()
+    taurine_100g = Double()
+    # pH (no unit)
+    ph_100g = Double()
+    # % of fruits, vegetables and nuts (excluding potatoes, yams, manioc)
+    fruits_vegetables_nuts_100g = Double()
+    # carbon footprint (as indicated on the packaging of some products)
+    carbon_footprint_100g = Double()
+    # Nutri-Score - Nutrition score derived from the UK FSA score and adapted for the French market (formula defined by the team of Professor Hercberg)
+    nutrition_score_fr_100g = Double()
+    # nutrition score defined by the UK Food Standards Administration (FSA)
+    nutrition_score_uk_100g = Double()
+    countries_en = Keyword()
+    pnns_groups_1 = Keyword()
+    pnns_groups_2 = Keyword()
+    states = Keyword()
+    states_tags = Text(multi=True)
+    states_en = Keyword()
+    ecoscore_grade = Keyword()
+    image_nutrition_url = Keyword()
+    image_nutrition_small_url = Keyword()
+    origins_en = Keyword()
+    ingredients_tags = Text(multi=True)
+    image_ingredients_url = Keyword()
+    image_ingredients_small_url = Keyword()
+    salt_100g = Double()
+    fruits_vegetables_nuts_estimate_100g = Double()
+    fruits_vegetables_nuts_estimate_from_ingredients_100g = Double()
+    fruits_vegetables_nuts_dried_100g = Double()
+    categories_en = Keyword()
+    nutriscore_score = Keyword()
+    nutriscore_grade = Keyword()
+    food_groups = Keyword()
+    food_groups_tags = Text(multi=True)
+    food_groups_en = Keyword()
+    ecoscore_score = Keyword()
+    main_category_en = Keyword()
+    additives_en = Keyword()
+    nova_group = Keyword()
+    labels_en = Keyword()
+    allergens = Keyword()
+    packaging_en = Keyword()
+    packaging_text = Keyword()
+    serving_quantity = Keyword()
+    carbon_footprint_from_meat_or_fish_100g = Double()
+    energy_from_fat_100g = Double()
+    folates_100g = Double()
+    soluble_fiber_100g = Double()
+    insoluble_fiber_100g = Double()
+    phylloquinone_100g = Double()
+    cocoa_100g = Double()
+    choline_100g = Double()
+    inositol_100g = Double()
+    collagen_meat_protein_ratio_100g = Double()
+    beta_carotene_100g = Double()
+    chlorophyl_100g = Double()
+    glycemic_index_100g = Double()
+    water_hardness_100g = Double()
+    beta_glucan_100g = Double()
+    carnitine_100g = Double()
+    traces_en = Keyword()
+    brand_owner = Keyword()
+    abbreviated_product_name = Keyword()
+    allergens_en = Keyword()
diff --git a/models/request.py b/models/request.py
new file mode 100644
index 00000000..4c4593be
--- /dev/null
+++ b/models/request.py
@@ -0,0 +1,46 @@
+import datetime
+from typing import Optional, List, Set
+from pydantic import BaseModel
+
+from utils import constants
+
+
+class SearchBase(BaseModel):
+    response_fields: Optional[Set[str]]
+    num_results: int = 10
+
+    def get_num_results(self):
+        return min(self.num_results, constants.MAX_RESULTS)
+
+
+class AutocompleteRequest(SearchBase):
+    text: str
+    search_fields: List[str] = constants.AUTOCOMPLETE_FIELDS
+
+
+class StringFilter(BaseModel):
+    field: str
+    value: str
+    # One of eq, ne, like, without
+    operator: str = 'eq'
+
+
+class NumericFilter(BaseModel):
+    field: str
+    value: float
+    # One of eq, ne, lt, gt, without
+    operator: str = 'eq'
+
+
+class DateTimeFilter(BaseModel):
+    field: str
+    value: datetime.datetime
+    # One of eq, ne, lt, gt, without
+    operator: str = 'eq'
+
+
+class SearchRequest(SearchBase):
+    # Works as an intersection/AND query
+    string_filters: List[StringFilter]
+    numeric_filters: List[NumericFilter]
+    date_time_filters: List[DateTimeFilter]
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..590564a6
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,24 @@
+anyio==3.6.1
+certifi==2022.6.15
+charset-normalizer==2.1.0
+click==8.1.3
+elasticsearch==7.17.4
+elasticsearch-dsl==7.4.0
+fastapi==0.79.0
+h11==0.13.0
+httptools==0.4.0
+idna==3.3
+pydantic==1.9.1
+python-dateutil==2.8.2
+python-dotenv==0.20.0
+PyYAML==6.0
+requests==2.28.1
+six==1.16.0
+sniffio==1.2.0
+starlette==0.19.1
+typing_extensions==4.3.0
+urllib3==1.26.11
+uvicorn==0.18.2
+uvloop==0.16.0
+watchfiles==0.16.1
+websockets==10.3
\ No newline at end of file
diff --git a/scripts/data-fields.txt b/scripts/data-fields.txt
new file mode 100644
index 00000000..8bd1f707
--- /dev/null
+++ b/scripts/data-fields.txt
@@ -0,0 +1,179 @@
+his file describes the fields from the CSV export of the products in the Open Food Facts database.
+
+See https://world.openfoodfacts.org/data for more information.
+
+The file encoding is Unicode UTF-8. The character that separates fields is <tab> (tabulation).
+
+Generalities:
+
+- fields that end with _t are dates in the UNIX timestamp format (number of seconds since Jan 1st 1970)
+- fields that end with _datetime are dates in the iso8601 format: yyyy-mm-ddThh:mn:ssZ
+- fields that end with _tags are comma separated list of tags (e.g. categories_tags is the set of normalized tags computer from the categories field)
+- fields that end with a language 2 letter code (e.g. fr for French) is the set of tags in that language
+- fields that end with _100g correspond to the amount of a nutriment (in g, or kJ for energy) for 100 g or 100 ml of product
+- fields that end with _serving correspond to the amount of a nutriment (in g, or kJ for energy) for 1 serving of the product
+
+List of fields:
+
+# general information:
+
+code : barcode of the product (can be EAN-13 or internal codes for some food stores), for products without a barcode, Open Food Facts assigns a number starting with the 200 reserved prefix
+url : url of the product page on Open Food Facts
+creator : contributor who first added the product
+created_t : date that the product was added (UNIX timestamp format)
+created_datetime : date that the product was added (iso8601 format: yyyy-mm-ddThh:mn:ssZ)
+last_modified_t : date that the product page was last modified
+last_modified_datetime
+product_name : name of the product
+generic_name
+quantity : quantity and unit
+
+# tags:
+
+packaging : shape, material
+packaging_tags
+brands
+brands_tags
+categories
+categories_tags
+categories_fr
+origins : origins of ingredients
+origins_tags
+manufacturing_places : places where manufactured or transformed
+manufacturing_places_tags
+labels
+labels_tags
+labels_fr
+emb_codes
+emb_codes_tags
+first_packaging_code_geo : coordinates corresponding to the first packaging code indicated
+cities
+cities_tags
+purchase_places
+stores
+countries : list of countries where the product is sold
+countries_tags
+countries_fr
+
+# ingredients:
+
+ingredients_text
+traces
+traces_tags
+
+# misc. data:
+
+serving_size : serving size in g
+no_nutriments : indicates if the nutrition facts are indicated on the food label
+additives_n : number of food additives
+additives
+additives_tags
+ingredients_from_palm_oil_n
+ingredients_from_palm_oil
+ingredients_from_palm_oil_tags
+ingredients_that_may_be_from_palm_oil_n
+ingredients_that_may_be_from_palm_oil
+ingredients_that_may_be_from_palm_oil_tags
+nutrition_grade_fr : nutrition grade ('a' to 'e'). see https://fr.openfoodfacts.org/nutriscore
+main_category
+main_category_fr
+image_url
+image_small_url
+
+# nutrition facts:
+# Please see https://wiki.openfoodfacts.org/Nutrients_handling_in_Open_Food_Facts for more information on nutrients
+
+energy_100g
+energy-kj_100g
+energy-kcal_100g
+proteins_100g
+casein_100g
+serum-proteins_100g
+nucleotides_100g
+carbohydrates_100g
+sugars_100g
+sucrose_100g
+glucose_100g
+fructose_100g
+lactose_100g
+maltose_100g
+maltodextrins_100g
+starch_100g
+polyols_100g
+fat_100g
+saturated-fat_100g
+butyric-acid_100g
+caproic-acid_100g
+caprylic-acid_100g
+capric-acid_100g
+lauric-acid_100g
+myristic-acid_100g
+palmitic-acid_100g
+stearic-acid_100g
+arachidic-acid_100g
+behenic-acid_100g
+lignoceric-acid_100g
+cerotic-acid_100g
+montanic-acid_100g
+melissic-acid_100g
+monounsaturated-fat_100g
+polyunsaturated-fat_100g
+omega-3-fat_100g
+alpha-linolenic-acid_100g
+eicosapentaenoic-acid_100g
+docosahexaenoic-acid_100g
+omega-6-fat_100g
+linoleic-acid_100g
+arachidonic-acid_100g
+gamma-linolenic-acid_100g
+dihomo-gamma-linolenic-acid_100g
+omega-9-fat_100g
+oleic-acid_100g
+elaidic-acid_100g
+gondoic-acid_100g
+mead-acid_100g
+erucic-acid_100g
+nervonic-acid_100g
+trans-fat_100g
+cholesterol_100g
+fiber_100g
+sodium_100g
+alcohol_100g : % vol of alcohol
+vitamin-a_100g
+vitamin-d_100g
+vitamin-e_100g
+vitamin-k_100g
+vitamin-c_100g
+vitamin-b1_100g
+vitamin-b2_100g
+vitamin-pp_100g
+vitamin-b6_100g
+vitamin-b9_100g
+vitamin-b12_100g
+biotin_100g : also known as Vitamine B8
+pantothenic-acid_100g : also known as Vitamine B5
+silica_100g
+bicarbonate_100g
+potassium_100g
+chloride_100g
+calcium_100g
+phosphorus_100g
+iron_100g
+magnesium_100g
+zinc_100g
+copper_100g
+manganese_100g
+fluoride_100g
+selenium_100g
+chromium_100g
+molybdenum_100g
+iodine_100g
+caffeine_100g
+taurine_100g
+ph_100g : pH (no unit)
+fruits-vegetables-nuts_100g : % of fruits, vegetables and nuts (excluding potatoes, yams, manioc)
+
+carbon-footprint_100g : carbon footprint (as indicated on the packaging of some products)
+
+nutrition-score-fr_100g : Nutri-Score - Nutrition score derived from the UK FSA score and adapted for the French market (formula defined by the team of Professor Hercberg)
+nutrition-score-uk_100g : nutrition score defined by the UK Food Standards Administration (FSA)
\ No newline at end of file
diff --git a/scripts/es_query.py b/scripts/es_query.py
new file mode 100644
index 00000000..9b535b4e
--- /dev/null
+++ b/scripts/es_query.py
@@ -0,0 +1,24 @@
+"""
+Script that allows manually querying ES
+"""
+import time
+
+from models.product import Product
+from utils import connection
+
+
+def manual_query():
+    connection.get_connection()
+
+    while True:
+        search_term = input("Please enter search term:\n")
+        start_time = time.perf_counter()
+        results = Product.search().query('match', product_name__autocomplete=search_term).execute()
+        for result in results[:10]:
+            print(result.meta.score, result.product_name)
+        end_time = time.perf_counter()
+        print("Time: {} seconds".format(end_time - start_time))
+
+
+if __name__ == "__main__":
+    manual_query()
\ No newline at end of file
diff --git a/scripts/generate_product_from_data_fields.py b/scripts/generate_product_from_data_fields.py
new file mode 100644
index 00000000..bd0ccdf0
--- /dev/null
+++ b/scripts/generate_product_from_data_fields.py
@@ -0,0 +1,76 @@
+"""
+This script takes the data-fields.txt and generates the updated product fields.
+Note, if field names are changed, etc, this will have considerable implications for the index.
+"""
+from utils import constants
+
+
+def get_type_for_field(field):
+    """
+    The docs state:
+    - fields that end with _t are dates in the UNIX timestamp format (number of seconds since Jan 1st 1970)
+    - fields that end with _datetime are dates in the iso8601 format: yyyy-mm-ddThh:mn:ssZ
+    - fields that end with _tags are comma separated list of tags (e.g. categories_tags is the set of normalized tags computer from the categories field)
+    - fields that end with a language 2 letter code (e.g. fr for French) is the set of tags in that language
+    - fields that end with _100g correspond to the amount of a nutriment (in g, or kJ for energy) for 100 g or 100 ml of product
+    - fields that end with _serving correspond to the amount of a nutriment (in g, or kJ for energy) for 1 serving of the product
+    """
+
+    suffix_to_type = {
+        't': 'Integer()',
+        'datetime': 'Date()',
+        'tags': 'Text(multi=True)',
+        '100g': 'Double()',
+        'serving': 'Double()',
+    }
+
+    suffix = field.split('_')[-1]
+
+    type = suffix_to_type.get(suffix)
+    if type:
+        return type
+
+    # Otherwise, just do as keyword
+    return 'Keyword()'
+
+
+def generate_product_from_data_fields():
+    with open('data-fields.txt', 'r') as f:
+        lines = f.readlines()
+
+    # Add undocumented fields
+    lines += constants.UNDOCUMENTED_FIELDS
+
+    for line in lines:
+        words = line.split()
+        # Lines with fields should follow the pattern of <field> or <field> : <description>
+        if len(words) != 1 and ':' not in words:
+            continue
+
+        # Remove any lines with a : but only one word (as these are headings)
+        if len(words) == 1 and ':' in words[0]:
+            continue
+
+        field_name = words[0]
+        description = ''
+        if len(words) > 2:
+            description = ' '.join(words[2:])
+
+        if description:
+            print("# {}".format(description))
+
+        # Some fields have dashes, let's replace them
+        field_name = field_name.replace('-', '_')
+
+        # Autocomplete cases
+        if field_name in constants.AUTOCOMPLETE_FIELDS:
+            # Do text with snowball (for direct searches), and autocomplete too
+            print(field_name + "= Text(analyzer='snowball', fields={'autocomplete': Text(analyzer=autocomplete), "
+                               "'raw': Keyword()})")
+        else:
+            field_type = get_type_for_field(field_name)
+            print("{} = {}".format(field_name, field_type))
+
+
+if __name__ == "__main__":
+    generate_product_from_data_fields()
diff --git a/scripts/http_query.py b/scripts/http_query.py
new file mode 100644
index 00000000..c252eedb
--- /dev/null
+++ b/scripts/http_query.py
@@ -0,0 +1,33 @@
+"""
+Script that allows manually querying the local search service
+"""
+import json
+import time
+import requests
+
+from models.product import Product
+from utils import connection
+
+
+def manual_query():
+
+    connection.get_connection()
+
+    while True:
+        search_term = input("Please enter search term:\n")
+        start_time = time.perf_counter()
+
+        payload = {
+            'text': search_term,
+            'num_results': 10,
+            'response_fields': ['product_name', 'pnns_groups_1'],
+        }
+        response = requests.post("http://127.0.0.1:8000/autocomplete", json=payload)
+        print(json.dumps(response.json(), indent=4, sort_keys=True))
+        print("Number of results: {}".format(len(response.json())))
+        end_time = time.perf_counter()
+        print("Time: {} seconds".format(end_time - start_time))
+
+
+if __name__ == "__main__":
+    manual_query()
\ No newline at end of file
diff --git a/scripts/perform_import.py b/scripts/perform_import.py
new file mode 100644
index 00000000..59e22889
--- /dev/null
+++ b/scripts/perform_import.py
@@ -0,0 +1,91 @@
+"""
+Performs an import from a CSV file. Note that this will delete the old index, causing downtime
+
+Pass in the path of the file with the filename argument
+
+Example:
+python scripts/perform_import.py --filename=X
+"""
+import csv
+import argparse
+import time
+from datetime import datetime
+
+from elasticsearch.helpers import bulk
+
+from models.product import Product
+from utils import connection, constants
+
+
+def gen_documents(filename, next_index):
+    valid_field_names = set(field_name for field_name in Product._doc_type.mapping.properties.to_dict()['properties'].keys())
+    with open(filename) as f:
+        input_file = csv.DictReader(f, delimiter='\t')
+
+        for i, row in enumerate(input_file):
+            # Use underscores for consistency
+            row = {k.replace('-', '_'): v for k, v in row.items()}
+
+            # Some fields have a leading dash (now underscore), remove them
+            row = {k: v for k, v in row.items() if not k.startswith('_')}
+
+            # For the first row, check that we have every column name in our index
+            if i == 0:
+                column_names = row.keys()
+                missing_column_names = []
+                for column_name in column_names:
+                    if column_name not in valid_field_names and column_name:
+                        missing_column_names.append(column_name)
+
+                if missing_column_names:
+                    print("Missing: {}".format(missing_column_names))
+                    exit(-1)
+
+            # Remove all empty values, we don't want to waste space in the index
+            row = {k: v for k, v in row.items() if v != ''}
+
+            # Split tags
+            for k in row.keys():
+                if k.endswith('_tags'):
+                    row[k] = row[k].split(',')
+
+            product = Product(**row).to_dict(True)
+            # Override the index
+            product['_index'] = next_index
+            yield product
+
+            if i % 100000 == 0 and i:
+                # Roughly 2.5M lines as of July 2022
+                print("Processed: {} lines".format(i))
+
+
+def update_alias(es, next_index):
+    # repoint the alias to point to the newly created index
+    es.indices.update_aliases(
+        body={
+            "actions": [
+                {"remove": {"alias": constants.INDEX_ALIAS, "index": constants.INDEX_ALIAS_PATTERN}},
+                {"add": {"alias": constants.INDEX_ALIAS, "index": next_index}},
+            ]
+        }
+    )
+
+
+def perform_import(filename):
+    es = connection.get_connection()
+    next_index = constants.INDEX_ALIAS_PATTERN.replace("*", datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f"))
+
+    Product.init(index=next_index)
+    bulk(es, gen_documents(filename, next_index))
+    update_alias(es, next_index)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("perform_import")
+    parser.add_argument("--filename", help="Filename where CSV file is located", type=str)
+    args = parser.parse_args()
+
+    start_time = time.perf_counter()
+    perform_import(args.filename)
+    end_time = time.perf_counter()
+    print("Import time: {} seconds".format(end_time - start_time))
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/utils/analyzers.py b/utils/analyzers.py
new file mode 100644
index 00000000..36958879
--- /dev/null
+++ b/utils/analyzers.py
@@ -0,0 +1,8 @@
+from elasticsearch_dsl import analyzer
+from elasticsearch_dsl import tokenizer
+
+autocomplete = analyzer(
+    'autocomplete',
+    tokenizer=tokenizer('bigram', 'edge_ngram', min_gram=2, max_gram=25, token_chars=['letter', 'digit', 'punctuation']),
+    filter=['lowercase', 'asciifolding']
+)
\ No newline at end of file
diff --git a/utils/connection.py b/utils/connection.py
new file mode 100644
index 00000000..7c8659b0
--- /dev/null
+++ b/utils/connection.py
@@ -0,0 +1,5 @@
+from elasticsearch_dsl.connections import connections
+
+
+def get_connection():
+    return connections.create_connection(hosts=['localhost:9200'])
\ No newline at end of file
diff --git a/utils/constants.py b/utils/constants.py
new file mode 100644
index 00000000..a84db698
--- /dev/null
+++ b/utils/constants.py
@@ -0,0 +1,59 @@
+AUTOCOMPLETE_FIELDS = ['product_name', 'brands', 'categories']
+INDEX_ALIAS = 'openfoodfacts'
+INDEX_ALIAS_PATTERN = INDEX_ALIAS + "-*"
+MAX_RESULTS = 100
+
+UNDOCUMENTED_FIELDS = [
+    'countries_en',
+    'pnns_groups_1',
+    'pnns_groups_2',
+    'states',
+    'states_tags',
+    'states_en',
+    'ecoscore_grade',
+    'image_nutrition_url',
+    'image_nutrition_small_url',
+    'origins_en',
+    'ingredients_tags',
+    'image_ingredients_url',
+    'image_ingredients_small_url',
+    'salt_100g',
+    'fruits_vegetables_nuts_estimate_100g',
+    'fruits_vegetables_nuts_estimate_from_ingredients_100g',
+    'fruits_vegetables_nuts_dried_100g',
+    'categories_en',
+    'nutriscore_score',
+    'nutriscore_grade',
+    'food_groups',
+    'food_groups_tags',
+    'food_groups_en',
+    'ecoscore_score',
+    'main_category_en',
+    'additives_en',
+    'nova_group',
+    'labels_en',
+    'allergens',
+    'packaging_en',
+    'packaging_text',
+    'serving_quantity',
+    'carbon_footprint_from_meat_or_fish_100g',
+    'energy_from_fat_100g',
+    'folates_100g',
+    'soluble_fiber_100g',
+    'insoluble_fiber_100g',
+    'phylloquinone_100g',
+    'cocoa_100g',
+    'choline_100g',
+    'inositol_100g',
+    'collagen_meat_protein_ratio_100g',
+    'beta_carotene_100g',
+    'chlorophyl_100g',
+    'glycemic_index_100g',
+    'water_hardness_100g',
+    'beta_glucan_100g',
+    'carnitine_100g',
+    'traces_en',
+    'brand_owner',
+    'abbreviated_product_name',
+    'allergens_en',
+]
\ No newline at end of file
diff --git a/utils/response.py b/utils/response.py
new file mode 100644
index 00000000..5667d99e
--- /dev/null
+++ b/utils/response.py
@@ -0,0 +1,28 @@
+from models.product import Product
+from models.request import SearchBase
+
+
+def create_response(es_results, request: SearchBase):
+    resp = [convert_es_result(r, request) for r in es_results]
+    return resp
+
+
+def convert_es_result(es_result, request: SearchBase):
+    if not es_result:
+        return None
+
+    # Add missing fields to maintain backwards compatibility
+    field_names = list(Product._doc_type.mapping.properties.to_dict()['properties'].keys())
+    result_dict = {field_name: [] if field_name.endswith('_tags') else '' for field_name in field_names}
+    result_dict.update(es_result.to_dict())
+
+    # Trim fields as needed
+    if request.response_fields:
+        trimmed_result_dict = {}
+        for response_field in request.response_fields:
+            if response_field in result_dict:
+                trimmed_result_dict[response_field]  = result_dict[response_field]
+
+        result_dict = trimmed_result_dict
+
+    return result_dict

From 56755341fd40000aa0ff42ea728dfcc9aac7fdde Mon Sep 17 00:00:00 2001
From: Simon Jenkins <simonj2@gmail.com>
Date: Tue, 2 Aug 2022 11:43:53 -0700
Subject: [PATCH 2/2] Docker for search service, rearchitecture

---
 .env                                          | 20 ++++++++++
 .gitignore                                    |  1 -
 Dockerfile                                    |  7 ++++
 README.md                                     | 37 ++++++++++++++-----
 {models => app}/__init__.py                   |  0
 api.py => app/api.py                          |  6 +--
 {utils => app/models}/__init__.py             |  0
 {models => app/models}/product.py             |  4 +-
 {models => app/models}/request.py             |  2 +-
 {scripts => app/scripts}/data-fields.txt      |  0
 {scripts => app/scripts}/es_query.py          |  4 +-
 .../generate_product_from_data_fields.py      |  2 +-
 {scripts => app/scripts}/http_query.py        |  3 +-
 {scripts => app/scripts}/perform_import.py    |  4 +-
 app/utils/__init__.py                         |  0
 {utils => app/utils}/analyzers.py             |  2 +-
 app/utils/connection.py                       |  6 +++
 {utils => app/utils}/constants.py             |  0
 {utils => app/utils}/response.py              |  4 +-
 docker-compose.yml                            |  9 ++++-
 utils/connection.py                           |  5 ---
 21 files changed, 83 insertions(+), 33 deletions(-)
 create mode 100644 .env
 create mode 100644 Dockerfile
 rename {models => app}/__init__.py (100%)
 rename api.py => app/api.py (90%)
 rename {utils => app/models}/__init__.py (100%)
 rename {models => app/models}/product.py (99%)
 rename {models => app/models}/request.py (96%)
 rename {scripts => app/scripts}/data-fields.txt (100%)
 rename {scripts => app/scripts}/es_query.py (88%)
 rename {scripts => app/scripts}/generate_product_from_data_fields.py (98%)
 rename {scripts => app/scripts}/http_query.py (92%)
 rename {scripts => app/scripts}/perform_import.py (97%)
 create mode 100644 app/utils/__init__.py
 rename {utils => app/utils}/analyzers.py (99%)
 create mode 100644 app/utils/connection.py
 rename {utils => app/utils}/constants.py (100%)
 rename {utils => app/utils}/response.py (91%)
 delete mode 100644 utils/connection.py

diff --git a/.env b/.env
new file mode 100644
index 00000000..cdc5150b
--- /dev/null
+++ b/.env
@@ -0,0 +1,20 @@
+# Password for the 'elastic' user (at least 6 characters)
+# This needs to be set in the environment variables
+# ELASTIC_PASSWORD=
+
+# Version of Elastic products
+STACK_VERSION=8.3.3
+
+# Set the cluster name
+CLUSTER_NAME=docker-cluster
+
+# Set to 'basic' or 'trial' to automatically start the 30-day trial
+LICENSE=basic
+
+# Port to expose Elasticsearch HTTP API to the host
+ES_PORT=9200
+#ES_PORT=127.0.0.1:9200
+
+# Increase or decrease based on the available host memory (in bytes)
+# 1GB works well, 2GB and above leads to lower latency
+MEM_LIMIT=2147483648
diff --git a/.gitignore b/.gitignore
index 58669e3d..a2a24871 100644
--- a/.gitignore
+++ b/.gitignore
@@ -102,7 +102,6 @@ celerybeat.pid
 *.sage.py
 
 # Environments
-.env
 .venv
 env/
 venv/
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 00000000..83dc4c72
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,7 @@
+# Instructions from https://fastapi.tiangolo.com/deployment/docker/
+FROM python:3.9
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+COPY ./app /code/app
+CMD ["uvicorn", "app.api:app", "--proxy-headers", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/README.md b/README.md
index 799502cf..563edc87 100644
--- a/README.md
+++ b/README.md
@@ -9,30 +9,47 @@ The main file is `api.py`, and the Product schema is in `models/product.py`.
 The `scripts/` directory contains various scripts for manual validation, constructing the product schema, importing, etc.
 
 ### Running locally
+Firstly, make sure your environment is configured:
+```commandline
+export ELASTIC_PASSWORD=PASSWORD_HERE
+```
+
+Then start docker:
+```console
+docker-compose up -d
+```
+
 Docker spins up:
 - Two elasticsearch nodes
 - [Elasticvue](https://elasticvue.com/)
+- The search service on port 8000
 
 You will then need to import from CSV (see instructions below).
 
-Make sure your environment is configured:
-```commandline
-export ELASTIC_PASSWORD=PASSWORD_HERE
-```
-
-
-### Helpful commands:
+### Development
+For development, you have two options for running the service:
+1. Docker
+2. Locally
 
-To start docker:
+To develop on docker, make the changes you need, then build the image and compose by running:
 ```console
+docker build -t off_search_image .
 docker-compose up -d
 ```
 
-To start server:
+However, this tends to be slower than developing locally.
+
+To develop locally, create a venv, install dependencies, then run the service:
 ```console
-uvicorn api:app --reload
+virtualenv .
+source venv/bin/activate
+pip install -r requirements.txt
+uvicorn app.api:app --reload --port=8001
 ```
+Note that it's important to use port 8001, as port 8000 will be used by the docker version of the search service. 
 
+
+### Helpful commands:
 To import data from the [CSV export](https://world.openfoodfacts.org/data):
 ```console
 python scripts/perform_import.py --filename=/path/to/file.csv
\ No newline at end of file
diff --git a/models/__init__.py b/app/__init__.py
similarity index 100%
rename from models/__init__.py
rename to app/__init__.py
diff --git a/api.py b/app/api.py
similarity index 90%
rename from api.py
rename to app/api.py
index 4ad2cb8b..e61ba988 100644
--- a/api.py
+++ b/app/api.py
@@ -1,9 +1,9 @@
 from elasticsearch_dsl import Q
 from fastapi import FastAPI, HTTPException
 
-from models.product import Product
-from models.request import AutocompleteRequest, SearchRequest
-from utils import connection, constants, response
+from app.models.product import Product
+from app.models.request import AutocompleteRequest, SearchRequest
+from app.utils import connection, constants, response
 
 app = FastAPI()
 connection.get_connection()
diff --git a/utils/__init__.py b/app/models/__init__.py
similarity index 100%
rename from utils/__init__.py
rename to app/models/__init__.py
diff --git a/models/product.py b/app/models/product.py
similarity index 99%
rename from models/product.py
rename to app/models/product.py
index 04087639..a0290abf 100644
--- a/models/product.py
+++ b/app/models/product.py
@@ -1,7 +1,7 @@
 from elasticsearch_dsl import Document, Date, Double, Keyword, Text, Integer
 
-from utils import constants
-from utils.analyzers import autocomplete
+from app.utils import constants
+from app.utils.analyzers import autocomplete
 
 
 class Product(Document):
diff --git a/models/request.py b/app/models/request.py
similarity index 96%
rename from models/request.py
rename to app/models/request.py
index 4c4593be..1948eba0 100644
--- a/models/request.py
+++ b/app/models/request.py
@@ -2,7 +2,7 @@
 from typing import Optional, List, Set
 from pydantic import BaseModel
 
-from utils import constants
+from app.utils import constants
 
 
 class SearchBase(BaseModel):
diff --git a/scripts/data-fields.txt b/app/scripts/data-fields.txt
similarity index 100%
rename from scripts/data-fields.txt
rename to app/scripts/data-fields.txt
diff --git a/scripts/es_query.py b/app/scripts/es_query.py
similarity index 88%
rename from scripts/es_query.py
rename to app/scripts/es_query.py
index 9b535b4e..bb7256b5 100644
--- a/scripts/es_query.py
+++ b/app/scripts/es_query.py
@@ -3,8 +3,8 @@
 """
 import time
 
-from models.product import Product
-from utils import connection
+from app.models.product import Product
+from app.utils import connection
 
 
 def manual_query():
diff --git a/scripts/generate_product_from_data_fields.py b/app/scripts/generate_product_from_data_fields.py
similarity index 98%
rename from scripts/generate_product_from_data_fields.py
rename to app/scripts/generate_product_from_data_fields.py
index bd0ccdf0..0c4a8f2d 100644
--- a/scripts/generate_product_from_data_fields.py
+++ b/app/scripts/generate_product_from_data_fields.py
@@ -2,7 +2,7 @@
 This script takes the data-fields.txt and generates the updated product fields.
 Note, if field names are changed, etc, this will have considerable implications for the index.
 """
-from utils import constants
+from app.utils import constants
 
 
 def get_type_for_field(field):
diff --git a/scripts/http_query.py b/app/scripts/http_query.py
similarity index 92%
rename from scripts/http_query.py
rename to app/scripts/http_query.py
index c252eedb..ee94ea00 100644
--- a/scripts/http_query.py
+++ b/app/scripts/http_query.py
@@ -5,8 +5,7 @@
 import time
 import requests
 
-from models.product import Product
-from utils import connection
+from app.utils import connection
 
 
 def manual_query():
diff --git a/scripts/perform_import.py b/app/scripts/perform_import.py
similarity index 97%
rename from scripts/perform_import.py
rename to app/scripts/perform_import.py
index 59e22889..161a982c 100644
--- a/scripts/perform_import.py
+++ b/app/scripts/perform_import.py
@@ -13,8 +13,8 @@
 
 from elasticsearch.helpers import bulk
 
-from models.product import Product
-from utils import connection, constants
+from app.models.product import Product
+from app.utils import connection, constants
 
 
 def gen_documents(filename, next_index):
diff --git a/app/utils/__init__.py b/app/utils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/utils/analyzers.py b/app/utils/analyzers.py
similarity index 99%
rename from utils/analyzers.py
rename to app/utils/analyzers.py
index 36958879..aad86965 100644
--- a/utils/analyzers.py
+++ b/app/utils/analyzers.py
@@ -5,4 +5,4 @@
     'autocomplete',
     tokenizer=tokenizer('bigram', 'edge_ngram', min_gram=2, max_gram=25, token_chars=['letter', 'digit', 'punctuation']),
     filter=['lowercase', 'asciifolding']
-)
\ No newline at end of file
+)
diff --git a/app/utils/connection.py b/app/utils/connection.py
new file mode 100644
index 00000000..27d296e4
--- /dev/null
+++ b/app/utils/connection.py
@@ -0,0 +1,6 @@
+import os
+from elasticsearch_dsl.connections import connections
+
+
+def get_connection():
+    return connections.create_connection(hosts=[os.getenv('ELASTICSEARCH_URL', '127.0.0.1:9200')])
\ No newline at end of file
diff --git a/utils/constants.py b/app/utils/constants.py
similarity index 100%
rename from utils/constants.py
rename to app/utils/constants.py
diff --git a/utils/response.py b/app/utils/response.py
similarity index 91%
rename from utils/response.py
rename to app/utils/response.py
index 5667d99e..1a6f66fa 100644
--- a/utils/response.py
+++ b/app/utils/response.py
@@ -1,5 +1,5 @@
-from models.product import Product
-from models.request import SearchBase
+from app.models.product import Product
+from app.models.request import SearchBase
 
 
 def create_response(es_results, request: SearchBase):
diff --git a/docker-compose.yml b/docker-compose.yml
index b9aabc16..44e7ecc0 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -123,7 +123,6 @@ services:
       timeout: 10s
       retries: 120
 
-
   # elasticsearch browser
   elasticvue:
       image: cars10/elasticvue
@@ -133,6 +132,14 @@ services:
       links:
           - es01
 
+  searchservice:
+    image: off_search_image
+    container_name: searchservice
+    environment:
+      - ELASTICSEARCH_URL=host.docker.internal:9200
+    ports:
+      - '8000:8000'
+
 volumes:
   certs:
     driver: local
diff --git a/utils/connection.py b/utils/connection.py
deleted file mode 100644
index 7c8659b0..00000000
--- a/utils/connection.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from elasticsearch_dsl.connections import connections
-
-
-def get_connection():
-    return connections.create_connection(hosts=['localhost:9200'])
\ No newline at end of file