Skip to content

Commit

Permalink
feat: Adding leaderboard for multilingual (#241)
Browse files Browse the repository at this point in the history
* feat: adding leaderboard for multilingual

* feat: multi languages

Co-authored-by: Rafael Mosquera <rafael.mosquera@factored.ai>

* style: changing names

* style: prettier

Co-authored-by: Rafael Mosquera <rafael.mosquera@factored.ai>
  • Loading branch information
Ciroye and remg1997 authored Feb 9, 2024
1 parent 5345034 commit 9c6fc37
Show file tree
Hide file tree
Showing 31 changed files with 933 additions and 359 deletions.
5 changes: 5 additions & 0 deletions backend/app/api/endpoints/base/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,3 +169,8 @@ def download_model_results(
media_type="application/json",
headers={"Content-Disposition": 'attachment; filename="data.json"'},
)


@router.get("/get_dynalab_model/{task_code}")
def get_dynalab_model(task_code: str):
return ModelService().get_dynalab_model(task_code)
5 changes: 5 additions & 0 deletions backend/app/api/endpoints/base/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,8 @@ async def authenticate(model: LoginRequest):
@router.post("/create_user")
async def create_user(model: CreateUserRequest):
return LoginService().create_user(model.email, model.password, model.username)


@router.get("/download_users_info", response_model={})
async def download_users_info():
return UserService().download_users_info()
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ async def initialize_model_evaluation(
model.s3_url,
model.model_id,
model.user_id,
model.selected_langs,
)
return {"response": "The model will be evaluated in the background"}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from typing import Optional

from pydantic import BaseModel


Expand All @@ -10,6 +12,7 @@ class InitializeModelEvaluationRequest(BaseModel):
s3_url: str
model_id: int
user_id: int
selected_langs: Optional[str] = None


class EvaluateDownstreamTaskRequest(BaseModel):
Expand Down
6 changes: 6 additions & 0 deletions backend/app/domain/services/base/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ def upload_model_to_s3_and_evaluate(
"s3_url": model_path,
"model_id": model["id"],
"user_id": user_id,
"selected_langs": languages,
},
)
self.email_helper.send(
Expand Down Expand Up @@ -489,3 +490,8 @@ def amount_of_models_uploaded_in_hr_diff(self, task_id: int, user_id: int):
)
)
return amount_of_models_uploaded_in_hr_diff

def get_dynalab_model(self, task_code: str):
bucket = "https://models-dynalab.s3.eu-west-3.amazonaws.com"
dynalab_link = f"{bucket}/{task_code}/dynalab-base-{task_code}.zip"
return dynalab_link
3 changes: 3 additions & 0 deletions backend/app/domain/services/base/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,6 @@ def get_stats_by_user_id(self, user_id: int):
"model_fooling_rate"
] = self.example_repository.get_model_fooling_rate_by_user_id(user_id)
return stats_by_user

def download_users_info(self):
return self.user_repository.download_users_info()
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,13 @@ def evaluate(metric: str, formatted_predictions: list, formatted_labels: list) -
return score_obj


def evaluation_without_tags(metric: str, predictions: list, labels: list) -> dict:
predictions, labels = format_data_for_evaluation(predictions, labels, tags=False)
def evaluation_without_tags(
metric: str, predictions: list, labels: list, multilingual: bool = False
) -> dict:
if not multilingual:
predictions, labels = format_data_for_evaluation(
predictions, labels, tags=False
)
perf, perf_dict = _compute_metric(metric, predictions, labels)
score_obj = {}
score_obj["perf"] = perf
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,22 @@ def group_labels(examples: list) -> dict:
return final_labels


def neccesary_format_for_evaluation(prediction_dict: dict, label: str):
def necessary_format_for_multilingual_evaluation(
prediction_dict: dict, label: str, gender: str
):
formated_dict = {}
list_of_labels = [
entry[gender] for entry in load_dataset(prediction_dict["dataset"])
]
list_of_predictions = [
entry[label] for entry in load_dataset(prediction_dict["predictions"])
]
formated_dict["formatted_base_predictions"] = list_of_predictions
formated_dict["formatted_base_dataset"] = list_of_labels
return formated_dict, False


def necessary_format_for_evaluation(prediction_dict: dict, label: str):
data_dict = {}
for data_version, data_types in prediction_dict.items():
for data_type in data_types:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,15 @@ def get_dataperf_balanced_accuracy_meta(task=None):
}


def get_chrf_meta(task=None):
return {
"unit": "%",
"pretty_name": "Balanced Accuracy",
"utility_direction": 1,
"offset": 0,
}


def get_chrf_pp(predictions: list, targets: list):
"""Chrf++ metric.
Expand Down Expand Up @@ -328,6 +337,11 @@ def get_bleu(predictions: list, targets: list):
return bleu.score


def get_chrf(predictions: list, targets: list):
chrf = sacrebleu.corpus_chrf(predictions, [targets])
return chrf.score


def get_bleu_meta(task=None):
return {"unit": "", "pretty_name": "BLEU", "utility_direction": 1, "offset": 0}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
get_accuracy_meta,
get_bleu,
get_bleu_meta,
get_chrf,
get_chrf_meta,
get_chrf_pp_meta,
get_dataperf_auc,
get_dataperf_auc_meta,
Expand Down Expand Up @@ -59,6 +61,7 @@
"dataperf_balanced_accuracy": get_dataperf_balanced_accuracy,
"new_accuracy": get_new_accuracy,
"matthews_correlation": get_matthews_correlation,
"chrf": get_chrf,
}

delta_metrics_dict = {
Expand Down Expand Up @@ -91,4 +94,5 @@
"dataperf_auc": get_dataperf_auc_meta,
"dataperf_fraction_of_fixes": get_dataperf_fraction_of_fixes_meta,
"dataperf_balanced_accuracy": get_dataperf_balanced_accuracy_meta,
"chrf": get_chrf_meta,
}
Loading

0 comments on commit 9c6fc37

Please sign in to comment.