-
-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from openfoodfacts/master
Initial service commit
- Loading branch information
Showing
21 changed files
with
1,094 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Password for the 'elastic' user (at least 6 characters) | ||
# This needs to be set in the environment variables | ||
# ELASTIC_PASSWORD= | ||
|
||
# Version of Elastic products | ||
STACK_VERSION=8.3.3 | ||
|
||
# Set the cluster name | ||
CLUSTER_NAME=docker-cluster | ||
|
||
# Set to 'basic' or 'trial' to automatically start the 30-day trial | ||
LICENSE=basic | ||
|
||
# Port to expose Elasticsearch HTTP API to the host | ||
ES_PORT=9200 | ||
#ES_PORT=127.0.0.1:9200 | ||
|
||
# Increase or decrease based on the available host memory (in bytes) | ||
# 1GB works well, 2GB and above leads to lower latency | ||
MEM_LIMIT=2147483648 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Instructions from https://fastapi.tiangolo.com/deployment/docker/ | ||
FROM python:3.9 | ||
WORKDIR /code | ||
COPY ./requirements.txt /code/requirements.txt | ||
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt | ||
COPY ./app /code/app | ||
CMD ["uvicorn", "app.api:app", "--proxy-headers", "--host", "0.0.0.0", "--port", "8000"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,55 @@ | ||
# openfoodfacts-search | ||
Open Food Facts Search API V3 using ElasticSearch - https://wiki.openfoodfacts.org/Search_API_V3 | ||
|
||
This API is currently in development. It is not serving any production traffic. The [Work Plan](https://wiki.openfoodfacts.org/Search_API_V3#Work_Plan) will be updated as development continues | ||
|
||
### Organization | ||
The main file is `api.py`, and the Product schema is in `models/product.py`. | ||
|
||
The `scripts/` directory contains various scripts for manual validation, constructing the product schema, importing, etc. | ||
|
||
### Running locally | ||
Firstly, make sure your environment is configured: | ||
```commandline | ||
export ELASTIC_PASSWORD=PASSWORD_HERE | ||
``` | ||
|
||
Then start docker: | ||
```console | ||
docker-compose up -d | ||
``` | ||
|
||
Docker spins up: | ||
- Two elasticsearch nodes | ||
- [Elasticvue](https://elasticvue.com/) | ||
- The search service on port 8000 | ||
|
||
You will then need to import from CSV (see instructions below). | ||
|
||
### Development | ||
For development, you have two options for running the service: | ||
1. Docker | ||
2. Locally | ||
|
||
To develop on docker, make the changes you need, then build the image and compose by running: | ||
```console | ||
docker build -t off_search_image . | ||
docker-compose up -d | ||
``` | ||
|
||
However, this tends to be slower than developing locally. | ||
|
||
To develop locally, create a venv, install dependencies, then run the service: | ||
```console | ||
virtualenv . | ||
source venv/bin/activate | ||
pip install -r requirements.txt | ||
uvicorn app.api:app --reload --port=8001 | ||
``` | ||
Note that it's important to use port 8001, as port 8000 will be used by the docker version of the search service. | ||
|
||
|
||
### Helpful commands: | ||
To import data from the [CSV export](https://world.openfoodfacts.org/data): | ||
```console | ||
python scripts/perform_import.py --filename=/path/to/file.csv |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
from elasticsearch_dsl import Q | ||
from fastapi import FastAPI, HTTPException | ||
|
||
from app.models.product import Product | ||
from app.models.request import AutocompleteRequest, SearchRequest | ||
from app.utils import connection, constants, response | ||
|
||
app = FastAPI() | ||
connection.get_connection() | ||
|
||
|
||
# TODO: Remove this commented out code, so that it's not confusing about where the current GET API is served | ||
# (retaining temporarily as a proof of concept) | ||
# @app.get("/{barcode}") | ||
# def get_product(barcode: str): | ||
# results = Product.search().query("match", code=barcode).execute() | ||
# results_dict = [r.to_dict() for r in results] | ||
# | ||
# if not results_dict: | ||
# raise HTTPException(status_code=404, detail="Barcode not found") | ||
# | ||
# product = results_dict[0] | ||
# return product | ||
|
||
@app.post("/autocomplete") | ||
def autocomplete(request: AutocompleteRequest): | ||
# TODO: This function needs unit testing | ||
if not request.search_fields: | ||
request.search_fields = constants.AUTOCOMPLETE_FIELDS | ||
for field in request.search_fields: | ||
if field not in constants.AUTOCOMPLETE_FIELDS: | ||
raise HTTPException(status_code=400, detail="Invalid field: {}".format(field)) | ||
|
||
match_queries = [] | ||
for field in request.search_fields: | ||
match_queries.append(Q('match', **{field: request.text})) | ||
|
||
results = Product.search().query('bool', should=match_queries).extra(size=request.get_num_results()).execute() | ||
resp = response.create_response(results, request) | ||
return resp | ||
|
||
|
||
@app.post("/search") | ||
def search(request: SearchRequest): | ||
raise NotImplementedError() |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,241 @@ | ||
from elasticsearch_dsl import Document, Date, Double, Keyword, Text, Integer | ||
|
||
from app.utils import constants | ||
from app.utils.analyzers import autocomplete | ||
|
||
|
||
class Product(Document): | ||
""" | ||
This should mirror the fields here: /~https://github.com/openfoodfacts/openfoodfacts-server/blob/main/html/data/data-fields.txt | ||
Use scripts/generate_product_from_data_fields.py to regenerate from data-fields.txt, but be careful for manual | ||
adjustments | ||
""" | ||
|
||
class Index: | ||
name = constants.INDEX_ALIAS | ||
settings = { | ||
"number_of_shards": 4, | ||
} | ||
|
||
# barcode of the product (can be EAN-13 or internal codes for some food stores), for products without a barcode, Open Food Facts assigns a number starting with the 200 reserved prefix | ||
code = Keyword() | ||
# url of the product page on Open Food Facts | ||
url = Keyword() | ||
# contributor who first added the product | ||
creator = Keyword() | ||
# date that the product was added (UNIX timestamp format) | ||
created_t = Integer() | ||
# date that the product was added (iso8601 format: yyyy-mm-ddThh:mn:ssZ) | ||
created_datetime = Date() | ||
# date that the product page was last modified | ||
last_modified_t = Integer() | ||
last_modified_datetime = Date() | ||
# name of the product | ||
product_name = Text(analyzer='snowball', fields={'autocomplete': Text(analyzer=autocomplete), 'raw': Keyword()}) | ||
generic_name = Keyword() | ||
# quantity and unit | ||
quantity = Keyword() | ||
# shape, material | ||
packaging = Keyword() | ||
packaging_tags = Text(multi=True) | ||
brands = Text(analyzer='snowball', fields={'autocomplete': Text(analyzer=autocomplete), 'raw': Keyword()}) | ||
brands_tags = Text(multi=True) | ||
categories = Text(analyzer='snowball', fields={'autocomplete': Text(analyzer=autocomplete), 'raw': Keyword()}) | ||
categories_tags = Text(multi=True) | ||
categories_fr = Keyword() | ||
# origins of ingredients | ||
origins = Keyword() | ||
origins_tags = Text(multi=True) | ||
# places where manufactured or transformed | ||
manufacturing_places = Keyword() | ||
manufacturing_places_tags = Text(multi=True) | ||
labels = Keyword() | ||
labels_tags = Text(multi=True) | ||
labels_fr = Keyword() | ||
emb_codes = Keyword() | ||
emb_codes_tags = Text(multi=True) | ||
# coordinates corresponding to the first packaging code indicated | ||
first_packaging_code_geo = Keyword() | ||
cities = Keyword() | ||
cities_tags = Text(multi=True) | ||
purchase_places = Keyword() | ||
stores = Keyword() | ||
# list of countries where the product is sold | ||
countries = Keyword() | ||
countries_tags = Text(multi=True) | ||
countries_fr = Keyword() | ||
ingredients_text = Keyword() | ||
traces = Keyword() | ||
traces_tags = Text(multi=True) | ||
# serving size in g | ||
serving_size = Keyword() | ||
# indicates if the nutrition facts are indicated on the food label | ||
no_nutriments = Keyword() | ||
# number of food additives | ||
additives_n = Keyword() | ||
additives = Keyword() | ||
additives_tags = Text(multi=True) | ||
ingredients_from_palm_oil_n = Keyword() | ||
ingredients_from_palm_oil = Keyword() | ||
ingredients_from_palm_oil_tags = Text(multi=True) | ||
ingredients_that_may_be_from_palm_oil_n = Keyword() | ||
ingredients_that_may_be_from_palm_oil = Keyword() | ||
ingredients_that_may_be_from_palm_oil_tags = Text(multi=True) | ||
# nutrition grade ('a' to 'e'). see https://fr.openfoodfacts.org/nutriscore | ||
nutrition_grade_fr = Keyword() | ||
main_category = Keyword() | ||
main_category_fr = Keyword() | ||
image_url = Keyword() | ||
image_small_url = Keyword() | ||
energy_100g = Double() | ||
energy_kj_100g = Double() | ||
energy_kcal_100g = Double() | ||
proteins_100g = Double() | ||
casein_100g = Double() | ||
serum_proteins_100g = Double() | ||
nucleotides_100g = Double() | ||
carbohydrates_100g = Double() | ||
sugars_100g = Double() | ||
sucrose_100g = Double() | ||
glucose_100g = Double() | ||
fructose_100g = Double() | ||
lactose_100g = Double() | ||
maltose_100g = Double() | ||
maltodextrins_100g = Double() | ||
starch_100g = Double() | ||
polyols_100g = Double() | ||
fat_100g = Double() | ||
saturated_fat_100g = Double() | ||
butyric_acid_100g = Double() | ||
caproic_acid_100g = Double() | ||
caprylic_acid_100g = Double() | ||
capric_acid_100g = Double() | ||
lauric_acid_100g = Double() | ||
myristic_acid_100g = Double() | ||
palmitic_acid_100g = Double() | ||
stearic_acid_100g = Double() | ||
arachidic_acid_100g = Double() | ||
behenic_acid_100g = Double() | ||
lignoceric_acid_100g = Double() | ||
cerotic_acid_100g = Double() | ||
montanic_acid_100g = Double() | ||
melissic_acid_100g = Double() | ||
monounsaturated_fat_100g = Double() | ||
polyunsaturated_fat_100g = Double() | ||
omega_3_fat_100g = Double() | ||
alpha_linolenic_acid_100g = Double() | ||
eicosapentaenoic_acid_100g = Double() | ||
docosahexaenoic_acid_100g = Double() | ||
omega_6_fat_100g = Double() | ||
linoleic_acid_100g = Double() | ||
arachidonic_acid_100g = Double() | ||
gamma_linolenic_acid_100g = Double() | ||
dihomo_gamma_linolenic_acid_100g = Double() | ||
omega_9_fat_100g = Double() | ||
oleic_acid_100g = Double() | ||
elaidic_acid_100g = Double() | ||
gondoic_acid_100g = Double() | ||
mead_acid_100g = Double() | ||
erucic_acid_100g = Double() | ||
nervonic_acid_100g = Double() | ||
trans_fat_100g = Double() | ||
cholesterol_100g = Double() | ||
fiber_100g = Double() | ||
sodium_100g = Double() | ||
# % vol of alcohol | ||
alcohol_100g = Double() | ||
vitamin_a_100g = Double() | ||
vitamin_d_100g = Double() | ||
vitamin_e_100g = Double() | ||
vitamin_k_100g = Double() | ||
vitamin_c_100g = Double() | ||
vitamin_b1_100g = Double() | ||
vitamin_b2_100g = Double() | ||
vitamin_pp_100g = Double() | ||
vitamin_b6_100g = Double() | ||
vitamin_b9_100g = Double() | ||
vitamin_b12_100g = Double() | ||
# also known as Vitamine B8 | ||
biotin_100g = Double() | ||
# also known as Vitamine B5 | ||
pantothenic_acid_100g = Double() | ||
silica_100g = Double() | ||
bicarbonate_100g = Double() | ||
potassium_100g = Double() | ||
chloride_100g = Double() | ||
calcium_100g = Double() | ||
phosphorus_100g = Double() | ||
iron_100g = Double() | ||
magnesium_100g = Double() | ||
zinc_100g = Double() | ||
copper_100g = Double() | ||
manganese_100g = Double() | ||
fluoride_100g = Double() | ||
selenium_100g = Double() | ||
chromium_100g = Double() | ||
molybdenum_100g = Double() | ||
iodine_100g = Double() | ||
caffeine_100g = Double() | ||
taurine_100g = Double() | ||
# pH (no unit) | ||
ph_100g = Double() | ||
# % of fruits, vegetables and nuts (excluding potatoes, yams, manioc) | ||
fruits_vegetables_nuts_100g = Double() | ||
# carbon footprint (as indicated on the packaging of some products) | ||
carbon_footprint_100g = Double() | ||
# Nutri-Score - Nutrition score derived from the UK FSA score and adapted for the French market (formula defined by the team of Professor Hercberg) | ||
nutrition_score_fr_100g = Double() | ||
# nutrition score defined by the UK Food Standards Administration (FSA) | ||
nutrition_score_uk_100g = Double() | ||
countries_en = Keyword() | ||
pnns_groups_1 = Keyword() | ||
pnns_groups_2 = Keyword() | ||
states = Keyword() | ||
states_tags = Text(multi=True) | ||
states_en = Keyword() | ||
ecoscore_grade = Keyword() | ||
image_nutrition_url = Keyword() | ||
image_nutrition_small_url = Keyword() | ||
origins_en = Keyword() | ||
ingredients_tags = Text(multi=True) | ||
image_ingredients_url = Keyword() | ||
image_ingredients_small_url = Keyword() | ||
salt_100g = Double() | ||
fruits_vegetables_nuts_estimate_100g = Double() | ||
fruits_vegetables_nuts_estimate_from_ingredients_100g = Double() | ||
fruits_vegetables_nuts_dried_100g = Double() | ||
categories_en = Keyword() | ||
nutriscore_score = Keyword() | ||
nutriscore_grade = Keyword() | ||
food_groups = Keyword() | ||
food_groups_tags = Text(multi=True) | ||
food_groups_en = Keyword() | ||
ecoscore_score = Keyword() | ||
main_category_en = Keyword() | ||
additives_en = Keyword() | ||
nova_group = Keyword() | ||
labels_en = Keyword() | ||
allergens = Keyword() | ||
packaging_en = Keyword() | ||
packaging_text = Keyword() | ||
serving_quantity = Keyword() | ||
carbon_footprint_from_meat_or_fish_100g = Double() | ||
energy_from_fat_100g = Double() | ||
folates_100g = Double() | ||
soluble_fiber_100g = Double() | ||
insoluble_fiber_100g = Double() | ||
phylloquinone_100g = Double() | ||
cocoa_100g = Double() | ||
choline_100g = Double() | ||
inositol_100g = Double() | ||
collagen_meat_protein_ratio_100g = Double() | ||
beta_carotene_100g = Double() | ||
chlorophyl_100g = Double() | ||
glycemic_index_100g = Double() | ||
water_hardness_100g = Double() | ||
beta_glucan_100g = Double() | ||
carnitine_100g = Double() | ||
traces_en = Keyword() | ||
brand_owner = Keyword() | ||
abbreviated_product_name = Keyword() | ||
allergens_en = Keyword() |
Oops, something went wrong.