Skip to content

Commit

Permalink
Merge pull request #1 from openfoodfacts/master
Browse files Browse the repository at this point in the history
Initial service commit
  • Loading branch information
simonj2 authored Aug 4, 2022
2 parents 3fe1d5f + 5675534 commit b82776d
Show file tree
Hide file tree
Showing 21 changed files with 1,094 additions and 1 deletion.
20 changes: 20 additions & 0 deletions .env
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Password for the 'elastic' user (at least 6 characters)
# This needs to be set in the environment variables
# ELASTIC_PASSWORD=

# Version of Elastic products
STACK_VERSION=8.3.3

# Set the cluster name
CLUSTER_NAME=docker-cluster

# Set to 'basic' or 'trial' to automatically start the 30-day trial
LICENSE=basic

# Port to expose Elasticsearch HTTP API to the host
ES_PORT=9200
#ES_PORT=127.0.0.1:9200

# Increase or decrease based on the available host memory (in bytes)
# 1GB works well, 2GB and above leads to lower latency
MEM_LIMIT=2147483648
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ celerybeat.pid
*.sage.py

# Environments
.env
.venv
env/
venv/
Expand All @@ -127,3 +126,7 @@ dmypy.json

# Pyre type checker
.pyre/

# Pycharm
.idea/
.DS_Store
7 changes: 7 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Instructions from https://fastapi.tiangolo.com/deployment/docker/
FROM python:3.9
WORKDIR /code
COPY ./requirements.txt /code/requirements.txt
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
COPY ./app /code/app
CMD ["uvicorn", "app.api:app", "--proxy-headers", "--host", "0.0.0.0", "--port", "8000"]
53 changes: 53 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,55 @@
# openfoodfacts-search
Open Food Facts Search API V3 using ElasticSearch - https://wiki.openfoodfacts.org/Search_API_V3

This API is currently in development. It is not serving any production traffic. The [Work Plan](https://wiki.openfoodfacts.org/Search_API_V3#Work_Plan) will be updated as development continues

### Organization
The main file is `api.py`, and the Product schema is in `models/product.py`.

The `scripts/` directory contains various scripts for manual validation, constructing the product schema, importing, etc.

### Running locally
Firstly, make sure your environment is configured:
```commandline
export ELASTIC_PASSWORD=PASSWORD_HERE
```

Then start docker:
```console
docker-compose up -d
```

Docker spins up:
- Two elasticsearch nodes
- [Elasticvue](https://elasticvue.com/)
- The search service on port 8000

You will then need to import from CSV (see instructions below).

### Development
For development, you have two options for running the service:
1. Docker
2. Locally

To develop on docker, make the changes you need, then build the image and compose by running:
```console
docker build -t off_search_image .
docker-compose up -d
```

However, this tends to be slower than developing locally.

To develop locally, create a venv, install dependencies, then run the service:
```console
virtualenv .
source venv/bin/activate
pip install -r requirements.txt
uvicorn app.api:app --reload --port=8001
```
Note that it's important to use port 8001, as port 8000 will be used by the docker version of the search service.


### Helpful commands:
To import data from the [CSV export](https://world.openfoodfacts.org/data):
```console
python scripts/perform_import.py --filename=/path/to/file.csv
Empty file added app/__init__.py
Empty file.
45 changes: 45 additions & 0 deletions app/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from elasticsearch_dsl import Q
from fastapi import FastAPI, HTTPException

from app.models.product import Product
from app.models.request import AutocompleteRequest, SearchRequest
from app.utils import connection, constants, response

app = FastAPI()
connection.get_connection()


# TODO: Remove this commented out code, so that it's not confusing about where the current GET API is served
# (retaining temporarily as a proof of concept)
# @app.get("/{barcode}")
# def get_product(barcode: str):
# results = Product.search().query("match", code=barcode).execute()
# results_dict = [r.to_dict() for r in results]
#
# if not results_dict:
# raise HTTPException(status_code=404, detail="Barcode not found")
#
# product = results_dict[0]
# return product

@app.post("/autocomplete")
def autocomplete(request: AutocompleteRequest):
# TODO: This function needs unit testing
if not request.search_fields:
request.search_fields = constants.AUTOCOMPLETE_FIELDS
for field in request.search_fields:
if field not in constants.AUTOCOMPLETE_FIELDS:
raise HTTPException(status_code=400, detail="Invalid field: {}".format(field))

match_queries = []
for field in request.search_fields:
match_queries.append(Q('match', **{field: request.text}))

results = Product.search().query('bool', should=match_queries).extra(size=request.get_num_results()).execute()
resp = response.create_response(results, request)
return resp


@app.post("/search")
def search(request: SearchRequest):
raise NotImplementedError()
Empty file added app/models/__init__.py
Empty file.
241 changes: 241 additions & 0 deletions app/models/product.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
from elasticsearch_dsl import Document, Date, Double, Keyword, Text, Integer

from app.utils import constants
from app.utils.analyzers import autocomplete


class Product(Document):
"""
This should mirror the fields here: /~https://github.com/openfoodfacts/openfoodfacts-server/blob/main/html/data/data-fields.txt
Use scripts/generate_product_from_data_fields.py to regenerate from data-fields.txt, but be careful for manual
adjustments
"""

class Index:
name = constants.INDEX_ALIAS
settings = {
"number_of_shards": 4,
}

# barcode of the product (can be EAN-13 or internal codes for some food stores), for products without a barcode, Open Food Facts assigns a number starting with the 200 reserved prefix
code = Keyword()
# url of the product page on Open Food Facts
url = Keyword()
# contributor who first added the product
creator = Keyword()
# date that the product was added (UNIX timestamp format)
created_t = Integer()
# date that the product was added (iso8601 format: yyyy-mm-ddThh:mn:ssZ)
created_datetime = Date()
# date that the product page was last modified
last_modified_t = Integer()
last_modified_datetime = Date()
# name of the product
product_name = Text(analyzer='snowball', fields={'autocomplete': Text(analyzer=autocomplete), 'raw': Keyword()})
generic_name = Keyword()
# quantity and unit
quantity = Keyword()
# shape, material
packaging = Keyword()
packaging_tags = Text(multi=True)
brands = Text(analyzer='snowball', fields={'autocomplete': Text(analyzer=autocomplete), 'raw': Keyword()})
brands_tags = Text(multi=True)
categories = Text(analyzer='snowball', fields={'autocomplete': Text(analyzer=autocomplete), 'raw': Keyword()})
categories_tags = Text(multi=True)
categories_fr = Keyword()
# origins of ingredients
origins = Keyword()
origins_tags = Text(multi=True)
# places where manufactured or transformed
manufacturing_places = Keyword()
manufacturing_places_tags = Text(multi=True)
labels = Keyword()
labels_tags = Text(multi=True)
labels_fr = Keyword()
emb_codes = Keyword()
emb_codes_tags = Text(multi=True)
# coordinates corresponding to the first packaging code indicated
first_packaging_code_geo = Keyword()
cities = Keyword()
cities_tags = Text(multi=True)
purchase_places = Keyword()
stores = Keyword()
# list of countries where the product is sold
countries = Keyword()
countries_tags = Text(multi=True)
countries_fr = Keyword()
ingredients_text = Keyword()
traces = Keyword()
traces_tags = Text(multi=True)
# serving size in g
serving_size = Keyword()
# indicates if the nutrition facts are indicated on the food label
no_nutriments = Keyword()
# number of food additives
additives_n = Keyword()
additives = Keyword()
additives_tags = Text(multi=True)
ingredients_from_palm_oil_n = Keyword()
ingredients_from_palm_oil = Keyword()
ingredients_from_palm_oil_tags = Text(multi=True)
ingredients_that_may_be_from_palm_oil_n = Keyword()
ingredients_that_may_be_from_palm_oil = Keyword()
ingredients_that_may_be_from_palm_oil_tags = Text(multi=True)
# nutrition grade ('a' to 'e'). see https://fr.openfoodfacts.org/nutriscore
nutrition_grade_fr = Keyword()
main_category = Keyword()
main_category_fr = Keyword()
image_url = Keyword()
image_small_url = Keyword()
energy_100g = Double()
energy_kj_100g = Double()
energy_kcal_100g = Double()
proteins_100g = Double()
casein_100g = Double()
serum_proteins_100g = Double()
nucleotides_100g = Double()
carbohydrates_100g = Double()
sugars_100g = Double()
sucrose_100g = Double()
glucose_100g = Double()
fructose_100g = Double()
lactose_100g = Double()
maltose_100g = Double()
maltodextrins_100g = Double()
starch_100g = Double()
polyols_100g = Double()
fat_100g = Double()
saturated_fat_100g = Double()
butyric_acid_100g = Double()
caproic_acid_100g = Double()
caprylic_acid_100g = Double()
capric_acid_100g = Double()
lauric_acid_100g = Double()
myristic_acid_100g = Double()
palmitic_acid_100g = Double()
stearic_acid_100g = Double()
arachidic_acid_100g = Double()
behenic_acid_100g = Double()
lignoceric_acid_100g = Double()
cerotic_acid_100g = Double()
montanic_acid_100g = Double()
melissic_acid_100g = Double()
monounsaturated_fat_100g = Double()
polyunsaturated_fat_100g = Double()
omega_3_fat_100g = Double()
alpha_linolenic_acid_100g = Double()
eicosapentaenoic_acid_100g = Double()
docosahexaenoic_acid_100g = Double()
omega_6_fat_100g = Double()
linoleic_acid_100g = Double()
arachidonic_acid_100g = Double()
gamma_linolenic_acid_100g = Double()
dihomo_gamma_linolenic_acid_100g = Double()
omega_9_fat_100g = Double()
oleic_acid_100g = Double()
elaidic_acid_100g = Double()
gondoic_acid_100g = Double()
mead_acid_100g = Double()
erucic_acid_100g = Double()
nervonic_acid_100g = Double()
trans_fat_100g = Double()
cholesterol_100g = Double()
fiber_100g = Double()
sodium_100g = Double()
# % vol of alcohol
alcohol_100g = Double()
vitamin_a_100g = Double()
vitamin_d_100g = Double()
vitamin_e_100g = Double()
vitamin_k_100g = Double()
vitamin_c_100g = Double()
vitamin_b1_100g = Double()
vitamin_b2_100g = Double()
vitamin_pp_100g = Double()
vitamin_b6_100g = Double()
vitamin_b9_100g = Double()
vitamin_b12_100g = Double()
# also known as Vitamine B8
biotin_100g = Double()
# also known as Vitamine B5
pantothenic_acid_100g = Double()
silica_100g = Double()
bicarbonate_100g = Double()
potassium_100g = Double()
chloride_100g = Double()
calcium_100g = Double()
phosphorus_100g = Double()
iron_100g = Double()
magnesium_100g = Double()
zinc_100g = Double()
copper_100g = Double()
manganese_100g = Double()
fluoride_100g = Double()
selenium_100g = Double()
chromium_100g = Double()
molybdenum_100g = Double()
iodine_100g = Double()
caffeine_100g = Double()
taurine_100g = Double()
# pH (no unit)
ph_100g = Double()
# % of fruits, vegetables and nuts (excluding potatoes, yams, manioc)
fruits_vegetables_nuts_100g = Double()
# carbon footprint (as indicated on the packaging of some products)
carbon_footprint_100g = Double()
# Nutri-Score - Nutrition score derived from the UK FSA score and adapted for the French market (formula defined by the team of Professor Hercberg)
nutrition_score_fr_100g = Double()
# nutrition score defined by the UK Food Standards Administration (FSA)
nutrition_score_uk_100g = Double()
countries_en = Keyword()
pnns_groups_1 = Keyword()
pnns_groups_2 = Keyword()
states = Keyword()
states_tags = Text(multi=True)
states_en = Keyword()
ecoscore_grade = Keyword()
image_nutrition_url = Keyword()
image_nutrition_small_url = Keyword()
origins_en = Keyword()
ingredients_tags = Text(multi=True)
image_ingredients_url = Keyword()
image_ingredients_small_url = Keyword()
salt_100g = Double()
fruits_vegetables_nuts_estimate_100g = Double()
fruits_vegetables_nuts_estimate_from_ingredients_100g = Double()
fruits_vegetables_nuts_dried_100g = Double()
categories_en = Keyword()
nutriscore_score = Keyword()
nutriscore_grade = Keyword()
food_groups = Keyword()
food_groups_tags = Text(multi=True)
food_groups_en = Keyword()
ecoscore_score = Keyword()
main_category_en = Keyword()
additives_en = Keyword()
nova_group = Keyword()
labels_en = Keyword()
allergens = Keyword()
packaging_en = Keyword()
packaging_text = Keyword()
serving_quantity = Keyword()
carbon_footprint_from_meat_or_fish_100g = Double()
energy_from_fat_100g = Double()
folates_100g = Double()
soluble_fiber_100g = Double()
insoluble_fiber_100g = Double()
phylloquinone_100g = Double()
cocoa_100g = Double()
choline_100g = Double()
inositol_100g = Double()
collagen_meat_protein_ratio_100g = Double()
beta_carotene_100g = Double()
chlorophyl_100g = Double()
glycemic_index_100g = Double()
water_hardness_100g = Double()
beta_glucan_100g = Double()
carnitine_100g = Double()
traces_en = Keyword()
brand_owner = Keyword()
abbreviated_product_name = Keyword()
allergens_en = Keyword()
Loading

0 comments on commit b82776d

Please sign in to comment.