import os
import torch
import torch.nn as nn
import torch.optim as optim
from tensordict import TensorDict
from torchrl.objectives import DQNLoss
from torchrl.envs import GymEnv, TransformedEnv, StepCounter
from torchrl.data import ListStorage, CompositeSpec, TensorDictReplayBuffer
from torchrl.modules import MLP, QValueActor
from matplotlib import pyplot as plt
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
env = GymEnv("CartPole-v1")
env = TransformedEnv(env)
env.append_transform(StepCounter())
input_shape = env.observation_spec["observation"].shape
env_specs = env.specs
num_outputs = env_specs["input_spec", "full_action_spec", "action"].space.n
action_spec = env_specs["input_spec", "full_action_spec", "action"]
def make_model():
return MLP(
in_features=input_shape[-1],
activation_class=nn.Tanh,
out_features=num_outputs,
num_cells=[120, 84],
)
model = make_model()
actor = QValueActor(
module=model,
spec=CompositeSpec(action=action_spec),
in_keys=["observation"],
)
loss = DQNLoss(
value_network=actor,
delay_value=True
)
optimizer = optim.Adam(params=actor.parameters(), lr=1e-3)
EPOCHS = 1000
UPDATE_TIMES = 3
BATCH_SIZE = 5
MIN_HONORABLE_STEPS = 100
replay_buffer = TensorDictReplayBuffer(storage=ListStorage(10000), batch_size=BATCH_SIZE)
step_counts =
honorable_models =
for i in range(EPOCHS):
state = env.reset()
finished = state["done"]
while not finished.item():
act = actor(state["observation"])
action = TensorDict({"action": act[0], "step_count": state["step_count"]}, batch_size=)
result = env.step(action)
next_state = result["next"]
finished = next_state["done"]
if finished.item():
next_state["reward"] = torch.tensor([-100.0])
result["observation"] = state["observation"]
state = next_state
replay_buffer.add(result)
actor.train()
for _ in range(UPDATE_TIMES):
samples = replay_buffer.sample()
loss_t = loss(samples)
q_loss = loss_t["loss"]
optimizer.zero_grad()
q_loss.backward()
optimizer.step()
step_counts.append(state["step_count"])
print(i, state["step_count"])
if state["step_count"] >= MIN_HONORABLE_STEPS:
copied_model = QValueActor(make_model(),spec=CompositeSpec(action=action_spec),in_keys=["observation"],)
copied_model.load_state_dict(actor.state_dict())
honorable_models.append(copied_model)
if i % 10 == 0:
UPDATE_TIMES += 1
plt.plot([i for i in range(len(step_counts))], step_counts)
plt.show(block=False)
plt.pause(10)
env = GymEnv("CartPole-v1", render_mode="human")
env = TransformedEnv(env)
env.append_transform(StepCounter())
for act in honorable_models:
state = env.reset()
finished = state["done"]
while not finished.item():
act = actor(state["observation"])
action = TensorDict({"action": act[0], "step_count": state["step_count"]}, batch_size=)
result = env.step(action)
next_state = result["next"]
finished = next_state["done"]
if finished.item():
next_state["reward"] = torch.tensor([-100.0])
result["observation"] = state["observation"]
state = next_state
print(state["step_count"])
오류 메세
C:\Users\MSI\anaconda3\envs\cs_ar\Lib\site-packages\torchrl\envs\common.py:2989: DeprecationWarning: Your wrapper was not given a device. Currently, this value will default to 'cpu'. From v0.5 it will default to None
. With a device of None, no device casting is performed and the resulting tensordicts are deviceless. Please set your device accordingly.
warnings.warn(
C:\Users\MSI\anaconda3\envs\cs_ar\Lib\site-packages\pygame\pkgdata.py:25: DeprecationWarning: pkg_resources is deprecated as an API. See Package Discovery and Resource Access using pkg_resources - setuptools 69.5.1.post20240425 documentation
from pkg_resources import resource_stream, resource_exists
학습을 전부다 한다음에 그래프가 출력되지 않고 다음과 같은 오류메세지가 발생하는데 무엇이 문제이며 해결책으로 무엇이 있을까요