parameters.yml

# Environment cofiguration
env:
  # Random seed used to generate rails
  # (14 medium - 14 big)
  seed: 1
  # Number of trains to spawn
  # (3, 5, 7 medium - 5, 7, 10 big)
  num_trains: 7
  # Environment width
  # (48 medium - 64 big)
  width: 48
  # Environment height
  # (27 medium - 36 big)
  height: 27
  # Maximum number of cities where agents can start or end
  # (5 medium - 9 big)
  max_cities: 5
  # Type of city distribution
  grid: False
  # Maximum number of tracks allowed between cities
  # (2 medium - 5 big)
  max_rails_between_cities: 2
  # Maximum number of parallel tracks within a city
  # (3 medium - 5 big)
  max_rails_in_cities: 3
  # Enable variable speed
  variable_speed: False
  # Malfunctions config
  malfunctions:
    # Enable malfunctions
    enabled: False
    # Malfunction rate
    # (0.0125 medium - 0.0125 big)
    rate: !!float 0.005
    # Malfunction minimum duration
    min_duration: 15
    # Malfunction maximum duration
    max_duration: 50
  rewards:
    stop_penalty: 2.0

# Observator config
observator:
  # Maximum depth for the observator
  max_depth: 21
  # Binary tree observation configs
  binary_tree:
    # Observation radius
    radius: 30
  # Tree observation configs
  tree:
    # Observation radius
    radius: 10

# Predictor config
predictor:
  # Maximum depth for the predictor
  # (for the shortest/deviation predictor
  # it indicates the maximum number of deviations)
  max_depth: 4

# Policy config
policy:
  # Select policy type
  type:
    # Graph observation policy
    graph: False
    # Decentralized FOV observation policy
    decentralized_fov: True
    # Standard tree observation policy
    tree: False
    # Binary tree observation policy
    binary_tree: False
    # Random policy
    random: False

# Action selector config
action_selector:
  # Type of action selector
  type:
    # Epsilon-greedy action selection
    eps_greedy: True
    # Boltzmann action selection
    boltzmann: False
    # Random action selection
    random: False
    # Greedy action selection
    greedy: False
    # Categorical action selection
    categorical: False

# Action selection parameter decay config
parameter_decay:
  # Type of decay
  type:
    # Linear decay (param - decay)
    linear: True
    # Exponential decay (param * decay)
    exponential: False
    # No decay
    none: False
  # Initial exploration
  start: !!float 1.0
  # Final exploration
  end: !!float 0.01
  # Percentage of episode with parameter greater than final exploration
  decaying_episodes: !!float 0.70

# Learning config
learning:
  # Learning rate
  learning_rate: !!float 0.5e-4
  # Weight multiplier for target network soft update
  tau: !!float 1e-3
  # Discount multiplier for expected Q-value of targets
  discount: !!float 0.99
  # Bellman fuction
  softmax_bellman:
    # Enable or disable
    enabled: True
    # Temperature parameter in the softmax
    temperature: !!float 0.5
  # Loss function
  loss:
    # (Masked) Huber loss
    huber: True
    # (Masked) MSE loss
    mse: False
  # Type of gradient clipping
  gradient:
    # Maximum value for the norm of the gradients
    max_norm: 10
    # Maximum symmetrical limit for the values of the gradients
    value_limit: 1
    # Clip the norm of the gradients (`max_grad_norm`)
    clip_norm: True
    # Clamp the gradient itself (-`grad_value_limit`, `grad_value_limit`)
    clamp_values: False

# Model config
model:
  # DQN configuration
  dqn:
    # Dueling DQN configurations
    dueling:
      # Enable or disable dueling DQN
      enabled: True
      # How to aggregate advantages
      aggregation:
        # Use the mean function
        mean: True
        # Use the max function
        max: False
    # Enable or disable double DQN
    double: True
    # The number of hidden layer with their hidden sizes
    hidden_sizes:
      - 128
      - 128
    # Non-linear function
    nonlinearity:
      # ReLU function
      relu: False
      # Tanh function
      tanh: True
  # Entire GNN configuration
  entire_gnn:
    # Size of the output embedding
    embedding_size: 4
    # Hidden embedding size in GNN layers
    hidden_size: 8
    # Number of embeddings to use as DQN input
    pos_size: 3
    # Dropout value
    dropout: !!float 0.2
    # Non-linear function
    nonlinearity:
      # ReLU function
      relu: True
      # Tanh function
      tanh: False
  # Multi GNN configuration
  multi_gnn:
    # CNN configuration
    cnn_encoder:
      # Conv2d settings
      conv:
        # Filter dimension
        kernel_size: 3
        # Filter stride
        stride: 1
        # Filter padding
        padding: 1
      # MaxPool2d settings
      pool:
        # Filter dimension
        kernel_size: 2
        # Filter stride
        stride: 2
        # Filter padding
        padding: 0
      # CNN encoder hidden channels number and sizes
      hidden_channels:
        - 32
        - 32
        - 64
        - 64
      # CNN encoder size of the output channel
      output_channels: 128
    # MLP configuration
    mlp_compression:
      # The number of hidden FC layers with their hidden sizes
      # (to map the encoder output to a fixed size)
      hidden_sizes: []
      # Final MLP output size
      output_size: 128
    # GNN configuration
    gnn_communication:
      # Number and hidden embedding sizes of graph convs to perform
      hidden_sizes:
        - 128
      # Size of the output embedding
      embedding_size: 128
      # Dropout value
      dropout: !!float 0.2
    # Non-linear function
    nonlinearity:
      # ReLU function
      relu: True
      # Tanh function
      tanh: False

# Replay buffer config
replay_buffer:
  # Maximum buffer dimension
  size: 100000
  # Batch size
  batch_size: 128
  # Try to learn after this many steps
  checkpoint: 4
  # Replay buffer to restore
  load: ""
  # Save replay buffer at each checkpoint
  save: False

# Generic config
generic:
  # Number of threads PyTorch can use
  num_threads: 1
  # Fix all the possible sources of randomness
  fix_random: True
  # Random seed used when `fix_random` is True
  random_seed: 1
  # Device config
  use_gpu: False
  # Enable wandb logging
  enable_wandb: True
  # Checkpoint to save model to wandb
  wandb_checkpoint: 500
  # Gradients logging in wandb
  wandb_gradients:
    # Enable or disable logging
    enabled: False
    # How often to log gradients
    checkpoint: 200

# Training config
training:
  # Checkpoint interval (how often to evaluate and save the model)
  checkpoint: 500

  # Train environment config
  train_env:
    # Path to the train environment file
    load: ""
    # Number of training episodes to run
    episodes: 7500
    # Train on random enviroments or on the same one
    all_random: True

  # Evaluation environment config
  eval_env:
    # Path to the evaluation environment file
    load: ""
    # Number of evaluation episodes
    episodes: 20
    # Evaluate on random enviroments or on the same one
    all_random: True

  # Renderer config
  renderer:
    # Render episodes during training
    training: False
    # How often to render an episode in training
    train_checkpoint: 1
    # Render episodes during evaluation
    evaluation: False
    # How often to render an episode in evaluation
    eval_checkpoint: 5
    # Save frames
    save_frames: False

testing:
  # Number of episodes to run
  episodes: 500
  # Path to the environment file
  load: ""
  # Path to the model to load
  model: ""
  # Verbose option
  verbose: True
  # Renderer config
  renderer:
    # Enable renderer
    enabled: True
    # Seconds to sleep between moves
    sleep: 2
    # Save intermediate renderer frames
    save_frames: True