Skip to content

Latest commit

 

History

History
84 lines (65 loc) · 2.43 KB

File metadata and controls

84 lines (65 loc) · 2.43 KB

Code 5

import numpy as np
import math

def reward_function(params):
    # Constants
    MIN_REWARD = 1e-3
    MAX_REWARD = 1.0
    WAYPOINT_REWARD = 0.5
    CENTER_PENALTY = 0.2
    DIRECTION_PENALTY = 0.3
    SPEED_THRESHOLD = 1.0
    SPEED_PENALTY = 0.1
    PROGRESS_REWARD = 0.3
    STEERING_THRESHOLD = 15.0
    SMOOTHNESS_REWARD = 0.2
    ACCELERATION_REWARD = 0.1
    TURNING_REWARD = 0.1

    # Extract relevant data
    distance_from_center = params['distance_from_center']
    track_width = params['track_width']
    is_left_of_center = params['is_left_of_center']
    heading = params['heading']
    progress = params['progress']
    speed = params['speed']
    steering = abs(params['steering_angle'])

    # Initialize reward
    reward = MIN_REWARD

    # Penalty for being away from the center
    reward -= CENTER_PENALTY * (distance_from_center / (0.5 * track_width))

    # Direction penalty
    if is_left_of_center:
        angle_from_center = abs(heading - 90)
    else:
        angle_from_center = abs(heading + 270)
    reward -= DIRECTION_PENALTY * (angle_from_center / 90.0)

    # Penalty for speed deviation
    if speed < SPEED_THRESHOLD:
        reward -= SPEED_PENALTY * (SPEED_THRESHOLD - speed)

    # Reward for making progress
    reward += PROGRESS_REWARD * progress

    # Penalty for excessive steering
    if steering > STEERING_THRESHOLD:
        reward -= (steering - STEERING_THRESHOLD) / 90.0

    # Reward for smooth steering
    if steering < 10:
        reward += SMOOTHNESS_REWARD

    # Reward for acceleration
    if 'last_speed' in params:
        acceleration = speed - params['last_speed']
        if acceleration > 0:
            reward += ACCELERATION_REWARD

    # Reward for turning
    if 'last_heading' in params:
        heading_change = abs(heading - params['last_heading'])
        if heading_change > 5:
            reward += TURNING_REWARD

    # Cap reward to maximum
    reward = max(MIN_REWARD, min(MAX_REWARD, reward))

    return float(reward)

This version of the reward function includes additional features such as rewards for acceleration and turning:

  • Acceleration Reward: Provides a reward if the car is accelerating.

  • Turning Reward: Provides a reward if the car is turning.

These additional features make the reward function more complex and allow for finer control over the behavior of the DeepRacer agent. Adjustments to the constants can further fine-tune the behavior of the agent.