Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
bob7783 committed Jul 23, 2024
1 parent 3aaa09b commit bf9388a
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 14 deletions.
13 changes: 6 additions & 7 deletions rl2/mountaincar/n_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,6 @@
import q_learning
from q_learning import plot_cost_to_go, FeatureTransformer, Model, plot_running_avg

gym_minor_version = int(gym.__version__.split('.')[1])
if gym_minor_version >= 19:
exit("Please install OpenAI Gym 0.19.0 or earlier")


class SGDRegressor:
def __init__(self, **kwargs):
Expand Down Expand Up @@ -58,7 +54,7 @@ def predict(self, X):

# returns a list of states_and_rewards, and the total reward
def play_one(model, eps, gamma, n=5):
observation = env.reset()
observation = env.reset()[0]
done = False
totalreward = 0
rewards = []
Expand All @@ -77,15 +73,18 @@ def play_one(model, eps, gamma, n=5):
actions.append(action)

prev_observation = observation
observation, reward, done, info = env.step(action)
observation, reward, done, truncated, info = env.step(action)

rewards.append(reward)

# update the model
if len(rewards) >= n:
# return_up_to_prediction = calculate_return_before_prediction(rewards, gamma)
return_up_to_prediction = multiplier.dot(rewards[-n:])
G = return_up_to_prediction + (gamma**n)*np.max(model.predict(observation)[0])
action_values = model.predict(observation)[0]
# print("action_values.shape:", action_values.shape)
G = return_up_to_prediction + (gamma**n)*np.max(action_values)
# print("G:", G)
model.update(states[-n], actions[-n], G)

# if len(rewards) > n:
Expand Down
10 changes: 3 additions & 7 deletions rl2/mountaincar/q_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@
from sklearn.kernel_approximation import RBFSampler
from sklearn.linear_model import SGDRegressor

gym_minor_version = int(gym.__version__.split('.')[1])
if gym_minor_version >= 19:
exit("Please install OpenAI Gym 0.19.0 or earlier")


# SGDRegressor defaults:
# loss='squared_loss', penalty='l2', alpha=0.0001,
Expand Down Expand Up @@ -74,7 +70,7 @@ def __init__(self, env, feature_transformer, learning_rate):
self.feature_transformer = feature_transformer
for i in range(env.action_space.n):
model = SGDRegressor(learning_rate=learning_rate)
model.partial_fit(feature_transformer.transform( [env.reset()] ), [0])
model.partial_fit(feature_transformer.transform( [env.reset()[0]] ), [0])
self.models.append(model)

def predict(self, s):
Expand Down Expand Up @@ -103,14 +99,14 @@ def sample_action(self, s, eps):

# returns a list of states_and_rewards, and the total reward
def play_one(model, env, eps, gamma):
observation = env.reset()
observation = env.reset()[0]
done = False
totalreward = 0
iters = 0
while not done and iters < 10000:
action = model.sample_action(observation, eps)
prev_observation = observation
observation, reward, done, info = env.step(action)
observation, reward, done, truncated, info = env.step(action)

# update the model
if done:
Expand Down

0 comments on commit bf9388a

Please sign in to comment.