docs/examples/qa_task_config.yaml

aggregation_metric:
  type: dynascore
goal:
  type: plain-text
  text: "Your goal: enter a question and select an answer in the context, such that the model is fooled."
context:
  type: selectable-text
  field_names_for_the_model:
    context: context
    answer: label
user_input:
- type: text
  placeholder: Enter an statement
  field_name_for_the_model: question
model_input:
  question: question
  context: context
response_fields:
  input_by_user: question
delta_metrics:
- type: fairness
- type: robustness
goal_message: enter a question and select an answer in the context, such that the
  model is fooled.
metadata:
  create:
  - display_name: example explanation
    name: example_explanation
    placeholder: Explain why your example is correct...
    type: string
  - display_name: model explanation
    model_wrong_condition: false
    name: model_explanation_right
    placeholder: Explain why you thought the model would make a mistake...
    type: string
  - display_name: model explanation
    model_wrong_condition: true
    name: model_explanation_wrong
    placeholder: Explain why you think the model made a mistake...
    type: string
  validate:
  - name: corrected_answer
    reference_name: context
    type: context_string_selection
    validated_label_condition: incorrect
  - name: target_explanation
    placeholder: Explain why your proposed target is correct...
    type: string
    validated_label_condition: incorrect
  - name: flag_reason
    placeholder: Enter the reason for flagging...
    type: string
    validated_label_condition: flagged
  - name: validator_example_explanation
    placeholder: Explain why the example is correct...
    type: string
    validated_label_condition: correct
  - name: validator_model_explanation
    placeholder: Enter what you think was done to try to trick the model...
    type: string
model_wrong_metric:
  reference_name: answer
  threshold: 0.4
  type: string_f1
output:
- name: answer
- name: conf
  single_prob: true
  type: prob
perf_metric:
  reference_name: answer
  type: squad_f1
model_output:
  model_prediction_label: answer
model_evaluation_metric:
  metric_name: string_f1
  metric_parameters:
    threshold: 0.4