Spaces:

TaherFattahi
/

TD3-robot-nav-irsim

Running

App Files Files Community

TaherFattahi commited on Mar 26

Commit

a54010a

0 Parent(s):

init: td3 robot nav

Browse files

Files changed (16) hide show

.gitattributes +5 -0
README.md +14 -0
animation/animation.gif +3 -0
app.py +223 -0
eval_world.yaml +50 -0
models/TD3/TD3.py +277 -0
models/TD3/checkpoint/TD3_actor.pth +3 -0
models/TD3/checkpoint/TD3_actor_target.pth +3 -0
models/TD3/checkpoint/TD3_critic.pth +3 -0
models/TD3/checkpoint/TD3_critic_target.pth +3 -0
replay_buffer.py +142 -0
requirements.txt +5 -0
robot_world.yaml +51 -0
sim.py +95 -0
train.py +142 -0
utils.py +123 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,5 @@

+animation/animation.gif filter=lfs diff=lfs merge=lfs -text
+models/TD3/checkpoint/TD3_critic.pth filter=lfs diff=lfs merge=lfs -text
+models/TD3/checkpoint/TD3_critic_target.pth filter=lfs diff=lfs merge=lfs -text
+models/TD3/checkpoint/TD3_actor.pth filter=lfs diff=lfs merge=lfs -text
+models/TD3/checkpoint/TD3_actor_target.pth filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: TD3 Robot Nav Irsim
+emoji: 🦀
+colorFrom: gray
+colorTo: gray
+sdk: gradio
+sdk_version: 5.23.1
+app_file: app.py
+pinned: false
+license: mit
+short_description: 'TD3: Reinforcement Learning for Autonomous Robot Navigation'
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

animation/animation.gif ADDED Viewed

Git LFS Details

SHA256: 90e50ce7fc700c36322161bb737834502915dae51df1e46057681e4bbf7c31a4
Pointer size: 132 Bytes
Size of remote file: 2.56 MB

app.py ADDED Viewed

	@@ -0,0 +1,223 @@

+from models.TD3.TD3 import TD3
+import torch
+import numpy as np
+from sim import SIM_ENV
+import yaml
+import gradio as gr
+import os
+from pathlib import Path
+import matplotlib
+import random
+matplotlib.use('Agg')
+def generate_random_points(num_scenarios=2):
+    """Generate random robot poses and goals"""
+    robot_poses = []
+    robot_goals = []
+    for _ in range(num_scenarios):
+        # Random pose: x, y, orientation, velocity
+        pose = [
+            [random.uniform(1, 9)],  # x position
+            [random.uniform(1, 9)],  # y position
+            [random.uniform(0, 3.14)],  # orientation
+            [0]  # initial velocity
+        ]
+        # Random goal: x, y, orientation
+        goal = [
+            [random.uniform(1, 9)],  # x position
+            [random.uniform(1, 9)],  # y position
+            [0]  # orientation
+        ]
+        robot_poses.append(pose)
+        robot_goals.append(goal)
+    return robot_poses, robot_goals
+def get_predefined_scenarios():
+    """Return predefined robot poses and goals"""
+    robot_poses = [
+        [[3], [4], [0], [0]],
+        [[8], [1], [1], [0]],
+        [[2], [6], [1], [0]],
+        [[7], [1], [0], [0]],
+        [[7], [6.5], [2], [0]],
+        [[9], [9], [3], [0]],
+        [[2], [9], [1], [0]],
+        [[3], [6], [3], [0]],
+        [[1], [7], [0], [0]],
+        [[5], [7], [3], [0]]
+    ]
+    robot_goals = [
+        [[8], [8], [0]],
+        [[2], [9], [0]],
+        [[7], [1], [0]],
+        [[7.2], [9], [0]],
+        [[1], [1], [0]],
+        [[5], [1], [0]],
+        [[7], [4], [0]],
+        [[9], [4], [0]],
+        [[1], [9], [0]],
+        [[5], [1], [0]]
+    ]
+    return robot_poses, robot_goals
+def run_simulation():
+    """Run the simulation and return the path to the generated GIF"""
+    action_dim = 2  # number of actions produced by the model
+    max_action = 1  # maximum absolute value of output actions
+    state_dim = 25  # number of input values in the neural network (vector length of state input)
+    device = torch.device(
+        "cuda" if torch.cuda.is_available() else "cpu"
+    )  # using cuda if it is available, cpu otherwise
+    epoch = 0  # epoch number
+    max_steps = 300  # maximum number of steps in single episode
+    model = TD3(
+        state_dim=state_dim,
+        action_dim=action_dim,
+        max_action=max_action,
+        device=device,
+        load_model=True,
+    )  # instantiate a model
+    sim = SIM_ENV(world_file="eval_world.yaml", save_ani=True)  # instantiate environment
+    # Generate random evaluation points instead of loading from YAML
+    # num_scenarios = random.randint(2, 5)  # Random number of scenarios between 2 and 5
+    # Get predefined scenarios and randomly select one
+    all_poses, all_goals = get_predefined_scenarios()
+    scenario_index = random.randint(0, len(all_poses) - 1)
+    robot_poses = [all_poses[scenario_index]]
+    robot_goals = [all_goals[scenario_index]]
+    print(f"Selected scenario {scenario_index+1} of {len(all_poses)}")
+    total_reward = 0.0
+    total_steps = 0
+    col = 0
+    goals = 0
+    for idx in range(len(robot_poses)):
+        count = 0
+        latest_scan, distance, cos, sin, collision, goal, a, reward = sim.reset(
+            robot_state=robot_poses[idx],
+            robot_goal=robot_goals[idx],
+            random_obstacles=False,
+        )
+        done = False
+        while not done and count < max_steps:
+            state, terminal = model.prepare_state(
+                latest_scan, distance, cos, sin, collision, goal, a
+            )
+            action = model.get_action(np.array(state), False)
+            a_in = [(action[0] + 1) / 4, action[1]]
+            latest_scan, distance, cos, sin, collision, goal, a, reward = sim.step(
+                lin_velocity=a_in[0], ang_velocity=a_in[1]
+            )
+            total_reward += reward
+            total_steps += 1
+            count += 1
+            if collision:
+                col += 1
+            if goal:
+                goals += 1
+            done = collision or goal
+    avg_step_reward = total_reward / total_steps
+    avg_reward = total_reward / len(robot_poses)
+    avg_col = col / len(robot_poses)
+    avg_goal = goals / len(robot_poses)
+    print(f"Total Reward: {total_reward}")
+    print(f"Average Reward: {avg_reward}")
+    print(f"Average Step Reward: {avg_step_reward}")
+    print(f"Average Collision rate: {avg_col}")
+    print(f"Average Goal rate: {avg_goal}")
+    print("..............................................")
+    model.writer.add_scalar("test/total_reward", total_reward, epoch)
+    model.writer.add_scalar("test/avg_reward", avg_reward, epoch)
+    model.writer.add_scalar("test/avg_step_reward", avg_step_reward, epoch)
+    model.writer.add_scalar("test/avg_col", avg_col, epoch)
+    model.writer.add_scalar("test/avg_goal", avg_goal, epoch)
+    sim.env.end(ending_time=3)
+    # Find the latest generated GIF file in the animation folder
+    animation_dir = Path("animation")
+    if animation_dir.exists():
+        gif_files = list(animation_dir.glob("*.gif"))
+        if gif_files:
+            # Sort by creation time (newest first)
+            latest_gif = max(gif_files, key=lambda x: x.stat().st_ctime)
+            return str(latest_gif), {
+                "Total Reward": f"{total_reward:.2f}",
+                "Average Reward": f"{avg_reward:.2f}",
+                "Average Step Reward": f"{avg_step_reward:.2f}",
+                "Collision Rate": f"{avg_col:.2f}",
+                "Goal Rate": f"{avg_goal:.2f}"
+            }
+    return None, {"Error": "No GIF file was generated"}
+def get_default_data():
+    """Return default animation and statistics data"""
+    # Find any existing GIF in the animation folder
+    animation_dir = Path("animation")
+    default_gif = None
+    if animation_dir.exists():
+        gif_files = list(animation_dir.glob("*.gif"))
+        if gif_files:
+            # Get the most recent GIF
+            default_gif = str(max(gif_files, key=lambda x: x.stat().st_ctime))
+    # Default statistics
+    default_stats = {
+        "Total Reward": "99.12",
+        "Average Reward": "99.12",
+        "Average Step Reward": "1.40",
+        "Collision Rate": "0.00",
+        "Goal Rate": "1.00"
+    }
+    return default_gif, default_stats
+def main(args=None):
+    """Main function with Gradio interface"""
+    # Get default data for initial display
+    default_gif, default_stats = get_default_data()
+    with gr.Blocks(title="Robot Navigation Simulation") as demo:
+        with gr.Row():
+            with gr.Column():
+                run_button = gr.Button("Run Simulation", variant="primary")
+                with gr.Row():
+                    with gr.Column():
+                        output_image = gr.Image(
+                            type="filepath",
+                            label="Simulation Animation",
+                            value=default_gif  # Set default value
+                        )
+                    with gr.Column():
+                        output_stats = gr.JSON(
+                            label="Simulation Statistics",
+                            value=default_stats  # Set default value
+                        )
+        run_button.click(
+            fn=run_simulation,
+            outputs=[output_image, output_stats]
+        )
+    demo.launch(share=False)
+if __name__ == "__main__":
+    main()

eval_world.yaml ADDED Viewed

	@@ -0,0 +1,50 @@

+world:
+  height: 10  # the height of the world
+  width: 10   # the height of the world
+  step_time: 0.3  # Hz calculate each step
+  sample_time: 0.3  # Hz for render and data extraction
+  collision_mode: 'react'
+robot:
+  - kinematics: {name: 'diff'}
+    shape: {name: 'circle', radius: 0.2}
+    vel_min: [ 0, -1.0 ]
+    vel_max: [ 1.0, 1.0 ]
+    state: [3, 4, 0, 0]
+    goal: [9, 9, 0]
+    arrive_mode: position
+    goal_threshold: 0.3
+    sensors:
+      - type: 'lidar2d'
+        range_min: 0
+        range_max: 7
+        angle_range: 3.14
+        number: 180
+        noise: True
+        std: 0.08
+        angle_std: 0.1
+        offset: [ 0, 0, 0 ]
+        alpha: 0.3
+    plot:
+      show_trajectory: True
+obstacle:
+  - shape: { name: 'circle', radius: 1.0 }  # radius
+    state: [ 5, 5, 0 ]
+  - shape: { name: 'circle', radius: 0.5 }  # radius
+    state: [ 7, 8, 0 ]
+  - shape: { name: 'circle', radius: 1.4 }  # radius
+    state: [ 3, 1, 0 ]
+  - shape: {name: 'rectangle', length: 1.0, width: 1.2}  # length, width
+    state: [8, 5, 1]
+  - shape: { name: 'rectangle', length: 0.5, width: 2.1 }  # length, width
+    state: [ 1, 8, 1.3 ]
+  - shape: { name: 'rectangle', length: 1.5, width: 0.7 }  # length, width
+    state: [ 6, 2, 0.5 ]
+  - shape: { name: 'linestring', vertices: [ [ 0, 0 ], [ 10, 0 ], [ 10, 10 ], [ 0, 10 ],[ 0, 0 ]  ] }  # vertices
+    kinematics: {name: 'static'}
+    state: [ 0, 0, 0 ]

models/TD3/TD3.py ADDED Viewed

	@@ -0,0 +1,277 @@

+from pathlib import Path
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from numpy import inf
+from torch.utils.tensorboard import SummaryWriter
+class Actor(nn.Module):
+    def __init__(self, state_dim, action_dim):
+        super(Actor, self).__init__()
+        self.layer_1 = nn.Linear(state_dim, 400)
+        torch.nn.init.kaiming_uniform_(self.layer_1.weight, nonlinearity="leaky_relu")
+        self.layer_2 = nn.Linear(400, 300)
+        torch.nn.init.kaiming_uniform_(self.layer_2.weight, nonlinearity="leaky_relu")
+        self.layer_3 = nn.Linear(300, action_dim)
+        self.tanh = nn.Tanh()
+    def forward(self, s):
+        s = F.leaky_relu(self.layer_1(s))
+        s = F.leaky_relu(self.layer_2(s))
+        a = self.tanh(self.layer_3(s))
+        return a
+class Critic(nn.Module):
+    def __init__(self, state_dim, action_dim):
+        super(Critic, self).__init__()
+        self.layer_1 = nn.Linear(state_dim, 400)
+        torch.nn.init.kaiming_uniform_(self.layer_1.weight, nonlinearity="leaky_relu")
+        self.layer_2_s = nn.Linear(400, 300)
+        torch.nn.init.kaiming_uniform_(self.layer_2_s.weight, nonlinearity="leaky_relu")
+        self.layer_2_a = nn.Linear(action_dim, 300)
+        torch.nn.init.kaiming_uniform_(self.layer_2_a.weight, nonlinearity="leaky_relu")
+        self.layer_3 = nn.Linear(300, 1)
+        torch.nn.init.kaiming_uniform_(self.layer_3.weight, nonlinearity="leaky_relu")
+        self.layer_4 = nn.Linear(state_dim, 400)
+        torch.nn.init.kaiming_uniform_(self.layer_1.weight, nonlinearity="leaky_relu")
+        self.layer_5_s = nn.Linear(400, 300)
+        torch.nn.init.kaiming_uniform_(self.layer_5_s.weight, nonlinearity="leaky_relu")
+        self.layer_5_a = nn.Linear(action_dim, 300)
+        torch.nn.init.kaiming_uniform_(self.layer_5_a.weight, nonlinearity="leaky_relu")
+        self.layer_6 = nn.Linear(300, 1)
+        torch.nn.init.kaiming_uniform_(self.layer_6.weight, nonlinearity="leaky_relu")
+    def forward(self, s, a):
+        s1 = F.leaky_relu(self.layer_1(s))
+        self.layer_2_s(s1)
+        self.layer_2_a(a)
+        s11 = torch.mm(s1, self.layer_2_s.weight.data.t())
+        s12 = torch.mm(a, self.layer_2_a.weight.data.t())
+        s1 = F.leaky_relu(s11 + s12 + self.layer_2_a.bias.data)
+        q1 = self.layer_3(s1)
+        s2 = F.leaky_relu(self.layer_4(s))
+        self.layer_5_s(s2)
+        self.layer_5_a(a)
+        s21 = torch.mm(s2, self.layer_5_s.weight.data.t())
+        s22 = torch.mm(a, self.layer_5_a.weight.data.t())
+        s2 = F.leaky_relu(s21 + s22 + self.layer_5_a.bias.data)
+        q2 = self.layer_6(s2)
+        return q1, q2
+# TD3 network
+class TD3(object):
+    def __init__(
+        self,
+        state_dim,
+        action_dim,
+        max_action,
+        device,
+        lr=1e-4,
+        save_every=0,
+        load_model=False,
+        save_directory=Path("models/TD3/new/checkpoint"),
+        model_name="TD3",
+        load_directory=Path("models/TD3/checkpoint"),
+    ):
+        # Initialize the Actor network
+        self.device = device
+        self.actor = Actor(state_dim, action_dim).to(self.device)
+        self.actor_target = Actor(state_dim, action_dim).to(self.device)
+        self.actor_target.load_state_dict(self.actor.state_dict())
+        self.actor_optimizer = torch.optim.Adam(params=self.actor.parameters(), lr=lr)
+        # Initialize the Critic networks
+        self.critic = Critic(state_dim, action_dim).to(self.device)
+        self.critic_target = Critic(state_dim, action_dim).to(self.device)
+        self.critic_target.load_state_dict(self.critic.state_dict())
+        self.critic_optimizer = torch.optim.Adam(params=self.critic.parameters(), lr=lr)
+        self.action_dim = action_dim
+        self.max_action = max_action
+        self.state_dim = state_dim
+        self.writer = SummaryWriter()
+        self.iter_count = 0
+        if load_model:
+            self.load(filename=model_name, directory=load_directory)
+        self.save_every = save_every
+        self.model_name = model_name
+        self.save_directory = save_directory
+    def get_action(self, obs, add_noise):
+        if add_noise:
+            return (
+                self.act(obs) + np.random.normal(0, 0.2, size=self.action_dim)
+            ).clip(-self.max_action, self.max_action)
+        else:
+            return self.act(obs)
+    def act(self, state):
+        # Function to get the action from the actor
+        state = torch.Tensor(state).to(self.device)
+        return self.actor(state).cpu().data.numpy().flatten()
+    # training cycle
+    def train(
+        self,
+        replay_buffer,
+        iterations,
+        batch_size,
+        discount=0.99,
+        tau=0.005,
+        policy_noise=0.2,
+        noise_clip=0.5,
+        policy_freq=2,
+    ):
+        av_Q = 0
+        max_Q = -inf
+        av_loss = 0
+        for it in range(iterations):
+            # sample a batch from the replay buffer
+            (
+                batch_states,
+                batch_actions,
+                batch_rewards,
+                batch_dones,
+                batch_next_states,
+            ) = replay_buffer.sample_batch(batch_size)
+            state = torch.Tensor(batch_states).to(self.device)
+            next_state = torch.Tensor(batch_next_states).to(self.device)
+            action = torch.Tensor(batch_actions).to(self.device)
+            reward = torch.Tensor(batch_rewards).to(self.device)
+            done = torch.Tensor(batch_dones).to(self.device)
+            # Obtain the estimated action from the next state by using the actor-target
+            next_action = self.actor_target(next_state)
+            # Add noise to the action
+            noise = (
+                torch.Tensor(batch_actions)
+                .data.normal_(0, policy_noise)
+                .to(self.device)
+            )
+            noise = noise.clamp(-noise_clip, noise_clip)
+            next_action = (next_action + noise).clamp(-self.max_action, self.max_action)
+            # Calculate the Q values from the critic-target network for the next state-action pair
+            target_Q1, target_Q2 = self.critic_target(next_state, next_action)
+            # Select the minimal Q value from the 2 calculated values
+            target_Q = torch.min(target_Q1, target_Q2)
+            av_Q += torch.mean(target_Q)
+            max_Q = max(max_Q, torch.max(target_Q))
+            # Calculate the final Q value from the target network parameters by using Bellman equation
+            target_Q = reward + ((1 - done) * discount * target_Q).detach()
+            # Get the Q values of the basis networks with the current parameters
+            current_Q1, current_Q2 = self.critic(state, action)
+            # Calculate the loss between the current Q value and the target Q value
+            loss = F.mse_loss(current_Q1, target_Q) + F.mse_loss(current_Q2, target_Q)
+            # Perform the gradient descent
+            self.critic_optimizer.zero_grad()
+            loss.backward()
+            self.critic_optimizer.step()
+            if it % policy_freq == 0:
+                # Maximize the actor output value by performing gradient descent on negative Q values
+                # (essentially perform gradient ascent)
+                actor_grad, _ = self.critic(state, self.actor(state))
+                actor_grad = -actor_grad.mean()
+                self.actor_optimizer.zero_grad()
+                actor_grad.backward()
+                self.actor_optimizer.step()
+                # Use soft update to update the actor-target network parameters by
+                # infusing small amount of current parameters
+                for param, target_param in zip(
+                    self.actor.parameters(), self.actor_target.parameters()
+                ):
+                    target_param.data.copy_(
+                        tau * param.data + (1 - tau) * target_param.data
+                    )
+                # Use soft update to update the critic-target network parameters by infusing
+                # small amount of current parameters
+                for param, target_param in zip(
+                    self.critic.parameters(), self.critic_target.parameters()
+                ):
+                    target_param.data.copy_(
+                        tau * param.data + (1 - tau) * target_param.data
+                    )
+            av_loss += loss
+        self.iter_count += 1
+        # Write new values for tensorboard
+        self.writer.add_scalar("train/loss", av_loss / iterations, self.iter_count)
+        self.writer.add_scalar("train/avg_Q", av_Q / iterations, self.iter_count)
+        self.writer.add_scalar("train/max_Q", max_Q, self.iter_count)
+        if self.save_every > 0 and self.iter_count % self.save_every == 0:
+            self.save(filename=self.model_name, directory=self.save_directory)
+    def save(self, filename, directory):
+        Path(directory).mkdir(parents=True, exist_ok=True)
+        torch.save(self.actor.state_dict(), "%s/%s_actor.pth" % (directory, filename))
+        torch.save(
+            self.actor_target.state_dict(),
+            "%s/%s_actor_target.pth" % (directory, filename),
+        )
+        torch.save(self.critic.state_dict(), "%s/%s_critic.pth" % (directory, filename))
+        torch.save(
+            self.critic_target.state_dict(),
+            "%s/%s_critic_target.pth" % (directory, filename),
+        )
+    def load(self, filename, directory):
+        self.actor.load_state_dict(
+            torch.load("%s/%s_actor.pth" % (directory, filename), map_location=self.device)
+        )
+        self.actor_target.load_state_dict(
+            torch.load("%s/%s_actor_target.pth" % (directory, filename), map_location=self.device)
+        )
+        self.critic.load_state_dict(
+            torch.load("%s/%s_critic.pth" % (directory, filename), map_location=self.device)
+        )
+        self.critic_target.load_state_dict(
+            torch.load("%s/%s_critic_target.pth" % (directory, filename), map_location=self.device)
+        )
+        print(f"Loaded weights from: {directory}")
+    def prepare_state(self, latest_scan, distance, cos, sin, collision, goal, action):
+        # update the returned data from ROS into a form used for learning in the current model
+        latest_scan = np.array(latest_scan)
+        inf_mask = np.isinf(latest_scan)
+        latest_scan[inf_mask] = 7.0
+        max_bins = self.state_dim - 5
+        bin_size = int(np.ceil(len(latest_scan) / max_bins))
+        # Initialize the list to store the minimum values of each bin
+        min_values = []
+        # Loop through the data and create bins
+        for i in range(0, len(latest_scan), bin_size):
+            # Get the current bin
+            bin = latest_scan[i : i + min(bin_size, len(latest_scan) - i)]
+            # Find the minimum value in the current bin and append it to the min_values list
+            min_values.append(min(bin) / 7)
+        # Normalize to [0, 1] range
+        distance /= 10
+        lin_vel = action[0] * 2
+        ang_vel = (action[1] + 1) / 2
+        state = min_values + [distance, cos, sin] + [lin_vel, ang_vel]
+        assert len(state) == self.state_dim
+        terminal = 1 if collision or goal else 0
+        return state, terminal

models/TD3/checkpoint/TD3_actor.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6846697cc1c4f16f2de1b329267ce8b001bc42e7f8558fa76a304b693c632063
+size 527836

models/TD3/checkpoint/TD3_actor_target.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae3fc4e9ec3e4dc6b5a9c512ce5b6bdac56ab5a41ff526750e7cb6b58751eaf0
+size 527906

models/TD3/checkpoint/TD3_critic.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b647c928e7a9ded7fe5f5715d7e5563d71ebbbc75be9e33d29e29b0c335f802e
+size 1060450

models/TD3/checkpoint/TD3_critic_target.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4952b2658115973e4e141c6ea9f86f0cc551580db9fdd8657336daa0cc323d28
+size 1060590

replay_buffer.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import random
+from collections import deque
+import itertools
+import numpy as np
+class ReplayBuffer(object):
+    def __init__(self, buffer_size, random_seed=123):
+        """
+        The right side of the deque contains the most recent experiences
+        """
+        self.buffer_size = buffer_size
+        self.count = 0
+        self.buffer = deque()
+        random.seed(random_seed)
+    def add(self, s, a, r, t, s2):
+        experience = (s, a, r, t, s2)
+        if self.count < self.buffer_size:
+            self.buffer.append(experience)
+            self.count += 1
+        else:
+            self.buffer.popleft()
+            self.buffer.append(experience)
+    def size(self):
+        return self.count
+    def sample_batch(self, batch_size):
+        if self.count < batch_size:
+            batch = random.sample(self.buffer, self.count)
+        else:
+            batch = random.sample(self.buffer, batch_size)
+        s_batch = np.array([_[0] for _ in batch])
+        a_batch = np.array([_[1] for _ in batch])
+        r_batch = np.array([_[2] for _ in batch]).reshape(-1, 1)
+        t_batch = np.array([_[3] for _ in batch]).reshape(-1, 1)
+        s2_batch = np.array([_[4] for _ in batch])
+        return s_batch, a_batch, r_batch, t_batch, s2_batch
+    def return_buffer(self):
+        s = np.array([_[0] for _ in self.buffer])
+        a = np.array([_[1] for _ in self.buffer])
+        r = np.array([_[2] for _ in self.buffer]).reshape(-1, 1)
+        t = np.array([_[3] for _ in self.buffer]).reshape(-1, 1)
+        s2 = np.array([_[4] for _ in self.buffer])
+        return s, a, r, t, s2
+    def clear(self):
+        self.buffer.clear()
+        self.count = 0
+class RolloutReplayBuffer(object):
+    def __init__(self, buffer_size, random_seed=123, history_len=10):
+        """
+        The right side of the deque contains the most recent experiences
+        """
+        self.buffer_size = buffer_size
+        self.count = 0
+        self.buffer = deque(maxlen=buffer_size)
+        random.seed(random_seed)
+        self.buffer.append([])
+        self.history_len = history_len
+    def add(self, s, a, r, t, s2):
+        experience = (s, a, r, t, s2)
+        if t:
+            self.count += 1
+            self.buffer[-1].append(experience)
+            self.buffer.append([])
+        else:
+            self.buffer[-1].append(experience)
+    def size(self):
+        return self.count
+    def sample_batch(self, batch_size):
+        if self.count < batch_size:
+            batch = random.sample(
+                list(itertools.islice(self.buffer, 0, len(self.buffer) - 1)), self.count
+            )
+        else:
+            batch = random.sample(
+                list(itertools.islice(self.buffer, 0, len(self.buffer) - 1)), batch_size
+            )
+        idx = [random.randint(0, len(b) - 1) for b in batch]
+        s_batch = []
+        s2_batch = []
+        for i in range(len(batch)):
+            if idx[i] == len(batch[i]):
+                s = batch[i]
+                s2 = batch[i]
+            else:
+                s = batch[i][: idx[i] + 1]
+                s2 = batch[i][: idx[i] + 1]
+            s = [v[0] for v in s]
+            s = s[::-1]
+            s2 = [v[4] for v in s2]
+            s2 = s2[::-1]
+            if len(s) < self.history_len:
+                missing = self.history_len - len(s)
+                s += [s[-1]] * missing
+                s2 += [s2[-1]] * missing
+            else:
+                s = s[: self.history_len]
+                s2 = s2[: self.history_len]
+            s = s[::-1]
+            s_batch.append(s)
+            s2 = s2[::-1]
+            s2_batch.append(s2)
+        a_batch = np.array([batch[i][idx[i]][1] for i in range(len(batch))])
+        r_batch = np.array([batch[i][idx[i]][2] for i in range(len(batch))]).reshape(
+            -1, 1
+        )
+        t_batch = np.array([batch[i][idx[i]][3] for i in range(len(batch))]).reshape(
+            -1, 1
+        )
+        return np.array(s_batch), a_batch, r_batch, t_batch, np.array(s2_batch)
+    def return_buffer(self):
+        s = np.array([_[0] for _ in self.buffer])
+        a = np.array([_[1] for _ in self.buffer])
+        r = np.array([_[2] for _ in self.buffer]).reshape(-1, 1)
+        t = np.array([_[3] for _ in self.buffer]).reshape(-1, 1)
+        s2 = np.array([_[4] for _ in self.buffer])
+        return s, a, r, t, s2
+    def clear(self):
+        self.buffer.clear()
+        self.count = 0

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+torch
+tensorboard
+numpy
+gradio
+ir-sim

robot_world.yaml ADDED Viewed

	@@ -0,0 +1,51 @@

+world:
+  height: 10  # the height of the world
+  width: 10   # the height of the world
+  step_time: 0.3  # 10Hz calculate each step
+  sample_time: 0.3  # 10 Hz for render and data extraction
+  collision_mode: 'react'
+robot:
+  - kinematics: {name: 'diff'}
+    shape: {name: 'circle', radius: 0.2}
+    vel_min: [ 0, -1.0 ]
+    vel_max: [ 1.0, 1.0 ]
+    state: [2, 2, 0, 0]
+    goal: [9, 9, 0]
+    arrive_mode: position
+    goal_threshold: 0.3
+    sensors:
+      - type: 'lidar2d'
+        range_min: 0
+        range_max: 7
+        angle_range: 3.14
+        number: 180
+        noise: True
+        std: 0.08
+        angle_std: 0.1
+        offset: [ 0, 0, 0 ]
+        alpha: 0.3
+    plot:
+      show_trajectory: True
+obstacle:
+  - number: 5
+    kinematics: {name: 'omni'}
+    distribution: {name: 'random', range_low: [0, 0, -3.14], range_high: [10, 10, 3.14]}
+    behavior: {name: 'rvo', wander: True, range_low: [0, 0, -3.14], range_high: [10, 10, 3.14], vxmax: 0.2, vymax: 0.2, factor: 1.0}
+    vel_max: [0.2, 0.2]
+    vel_min: [-0.2, -0.2]
+    shape:
+      - {name: 'circle', radius: 1.0, random_shape: True}
+      - {name: 'polygon', random_shape: true, avg_radius_range: [0.5, 1.0], irregularity_range: [0, 0.4], spikeyness_range: [0, 0.4], num_vertices_range: [4, 6]}
+  - shape: { name: 'rectangle', length: 1.0, width: 1.2 }  # length, width
+    state: [ 8, 5, 1 ]
+    kinematics: {name: 'static'}
+  - shape: { name: 'rectangle', length: 0.5, width: 2.1 }  # length, width
+    state: [ 1, 8, 1.3 ]
+    kinematics: {name: 'static'}
+  - shape: { name: 'linestring', vertices: [ [ 0, 0 ], [ 10, 0 ], [ 10, 10 ], [ 0, 10 ],[ 0, 0 ]  ] }  # vertices
+    kinematics: {name: 'static'}
+    state: [ 0, 0, 0 ]

sim.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import irsim
+import numpy as np
+import random
+import shapely
+from irsim.lib.handler.geometry_handler import GeometryFactory
+from irsim.world import ObjectBase
+class SIM_ENV:
+    def __init__(self, world_file="robot_world.yaml", save_ani=False):
+        self.env = irsim.make(world_file, save_ani=save_ani)
+        robot_info = self.env.get_robot_info(0)
+        self.robot_goal = robot_info.goal
+    def step(self, lin_velocity=0.0, ang_velocity=0.1):
+        self.env.step(action_id=0, action=np.array([[lin_velocity], [ang_velocity]]))
+        self.env.render()
+        scan = self.env.get_lidar_scan()
+        latest_scan = scan["ranges"]
+        robot_state = self.env.get_robot_state()
+        goal_vector = [
+            self.robot_goal[0].item() - robot_state[0].item(),
+            self.robot_goal[1].item() - robot_state[1].item(),
+        ]
+        distance = np.linalg.norm(goal_vector)
+        goal = self.env.robot.arrive
+        pose_vector = [np.cos(robot_state[2]).item(), np.sin(robot_state[2]).item()]
+        cos, sin = self.cossin(pose_vector, goal_vector)
+        collision = self.env.robot.collision
+        action = [lin_velocity, ang_velocity]
+        reward = self.get_reward(goal, collision, action, latest_scan)
+        return latest_scan, distance, cos, sin, collision, goal, action, reward
+    def reset(self, robot_state=None, robot_goal=None, random_obstacles=True):
+        if robot_state is None:
+            robot_state = [[random.uniform(1, 9)], [random.uniform(1, 9)], [0], [0]]
+        self.env.robot.set_state(
+            state=np.array(robot_state),
+            init=True,
+        )
+        if random_obstacles:
+            self.env.random_obstacle_position(
+                range_low=[0, 0, -3.14],
+                range_high=[10, 10, 3.14],
+                ids=[i + 1 for i in range(7)],
+                non_overlapping=True,
+            )
+        if robot_goal is None:
+            unique = True
+            while unique:
+                robot_goal = [[random.uniform(1, 9)], [random.uniform(1, 9)], [0]]
+                shape = {"name": "circle", "radius": 0.4}
+                state = [robot_goal[0], robot_goal[1], robot_goal[2]]
+                gf = GeometryFactory.create_geometry(**shape)
+                geometry = gf.step(np.c_[state])
+                unique = any(
+                    [
+                        shapely.intersects(geometry, obj._geometry)
+                        for obj in self.env.obstacle_list
+                    ]
+                )
+        self.env.robot.set_goal(np.array(robot_goal), init=True)
+        self.env.reset()
+        self.robot_goal = self.env.robot.goal
+        action = [0.0, 0.0]
+        latest_scan, distance, cos, sin, _, _, action, reward = self.step(
+            lin_velocity=action[0], ang_velocity=action[1]
+        )
+        return latest_scan, distance, cos, sin, False, False, action, reward
+    @staticmethod
+    def cossin(vec1, vec2):
+        vec1 = vec1 / np.linalg.norm(vec1)
+        vec2 = vec2 / np.linalg.norm(vec2)
+        cos = np.dot(vec1, vec2)
+        sin = vec1[0] * vec2[1] - vec1[1] * vec2[0]
+        return cos, sin
+    @staticmethod
+    def get_reward(goal, collision, action, laser_scan):
+        if goal:
+            return 100.0
+        elif collision:
+            return -100.0
+        else:
+            r3 = lambda x: 1.35 - x if x < 1.35 else 0.0
+            return action[0] - abs(action[1]) / 2 - r3(min(laser_scan)) / 2

train.py ADDED Viewed

	@@ -0,0 +1,142 @@

+from models.TD3.TD3 import TD3
+import torch
+import numpy as np
+from sim import SIM_ENV
+from utils import get_buffer
+def main(args=None):
+    """Main training function"""
+    action_dim = 2  # number of actions produced by the model
+    max_action = 1  # maximum absolute value of output actions
+    state_dim = 25  # number of input values in the neural network (vector length of state input)
+    device = torch.device(
+        "cuda" if torch.cuda.is_available() else "cpu"
+    )  # using cuda if it is available, cpu otherwise
+    nr_eval_episodes = 10  # how many episodes to use to run evaluation
+    max_epochs = 60  # max number of epochs
+    epoch = 0  # starting epoch number
+    episodes_per_epoch = 70  # how many episodes to run in single epoch
+    episode = 0  # starting episode number
+    train_every_n = 2  # train and update network parameters every n episodes
+    training_iterations = 80  # how many batches to use for single training cycle
+    batch_size = 64  # batch size for each training iteration
+    max_steps = 300  # maximum number of steps in single episode
+    steps = 0  # starting step number
+    load_saved_buffer = False  # whether to load experiences from assets/data.yml
+    pretrain = False  # whether to use the loaded experiences to pre-train the model (load_saved_buffer must be True)
+    pretraining_iterations = (
+        10  # number of training iterations to run during pre-training
+    )
+    save_every = 10  # save the model every n training cycles
+    model = TD3(
+        state_dim=state_dim,
+        action_dim=action_dim,
+        max_action=max_action,
+        device=device,
+        save_every=save_every,
+        load_model=False,
+    )  # instantiate a model
+    sim = SIM_ENV()  # instantiate environment
+    replay_buffer = get_buffer(
+        model,
+        sim,
+        load_saved_buffer,
+        pretrain,
+        pretraining_iterations,
+        training_iterations,
+        batch_size,
+    )
+    latest_scan, distance, cos, sin, collision, goal, a, reward = sim.step(
+        lin_velocity=0.0, ang_velocity=0.0
+    )  # get the initial step state
+    while epoch < max_epochs:  # train until max_epochs is reached
+        state, terminal = model.prepare_state(
+            latest_scan, distance, cos, sin, collision, goal, a
+        )  # get state a state representation from returned data from the environment
+        action = model.get_action(np.array(state), True)  # get an action from the model
+        a_in = [
+            (action[0] + 1) / 4,
+            action[1],
+        ]  # clip linear velocity to [0, 0.5] m/s range
+        latest_scan, distance, cos, sin, collision, goal, a, reward = sim.step(
+            lin_velocity=a_in[0], ang_velocity=a_in[1]
+        )  # get data from the environment
+        next_state, terminal = model.prepare_state(
+            latest_scan, distance, cos, sin, collision, goal, a
+        )  # get a next state representation
+        replay_buffer.add(
+            state, action, reward, terminal, next_state
+        )  # add experience to the replay buffer
+        if (
+            terminal or steps == max_steps
+        ):  # reset environment of terminal stat ereached, or max_steps were taken
+            latest_scan, distance, cos, sin, collision, goal, a, reward = sim.reset()
+            episode += 1
+            if episode % train_every_n == 0:
+                model.train(
+                    replay_buffer=replay_buffer,
+                    iterations=training_iterations,
+                    batch_size=batch_size,
+                )  # train the model and update its parameters
+            steps = 0
+        else:
+            steps += 1
+        if (
+            episode + 1
+        ) % episodes_per_epoch == 0:  # if epoch is concluded, run evaluation
+            episode = 0
+            epoch += 1
+            evaluate(model, epoch, sim, eval_episodes=nr_eval_episodes)
+def evaluate(model, epoch, sim, eval_episodes=10):
+    print("..............................................")
+    print(f"Epoch {epoch}. Evaluating scenarios")
+    avg_reward = 0.0
+    col = 0
+    goals = 0
+    for _ in range(eval_episodes):
+        count = 0
+        latest_scan, distance, cos, sin, collision, goal, a, reward = sim.reset()
+        done = False
+        while not done and count < 501:
+            state, terminal = model.prepare_state(
+                latest_scan, distance, cos, sin, collision, goal, a
+            )
+            action = model.get_action(np.array(state), False)
+            a_in = [(action[0] + 1) / 4, action[1]]
+            latest_scan, distance, cos, sin, collision, goal, a, reward = sim.step(
+                lin_velocity=a_in[0], ang_velocity=a_in[1]
+            )
+            avg_reward += reward
+            count += 1
+            if collision:
+                col += 1
+            if goal:
+                goals += 1
+            done = collision or goal
+    avg_reward /= eval_episodes
+    avg_col = col / eval_episodes
+    avg_goal = goals / eval_episodes
+    print(f"Average Reward: {avg_reward}")
+    print(f"Average Collision rate: {avg_col}")
+    print(f"Average Goal rate: {avg_goal}")
+    print("..............................................")
+    model.writer.add_scalar("eval/avg_reward", avg_reward, epoch)
+    model.writer.add_scalar("eval/avg_col", avg_col, epoch)
+    model.writer.add_scalar("eval/avg_goal", avg_goal, epoch)
+if __name__ == "__main__":
+    main()

utils.py ADDED Viewed

	@@ -0,0 +1,123 @@

+from typing import List
+from tqdm import tqdm
+import yaml
+from replay_buffer import ReplayBuffer, RolloutReplayBuffer
+class Pretraining:
+    def __init__(
+        self,
+        file_names: List[str],
+        model: object,
+        replay_buffer: object,
+        reward_function,
+    ):
+        self.file_names = file_names
+        self.model = model
+        self.replay_buffer = replay_buffer
+        self.reward_function = reward_function
+    def load_buffer(self):
+        for file_name in self.file_names:
+            print("Loading file: ", file_name)
+            with open(file_name, "r") as file:
+                samples = yaml.full_load(file)
+                for i in tqdm(range(1, len(samples) - 1)):
+                    sample = samples[i]
+                    latest_scan = sample["latest_scan"]
+                    distance = sample["distance"]
+                    cos = sample["cos"]
+                    sin = sample["sin"]
+                    collision = sample["collision"]
+                    goal = sample["goal"]
+                    action = sample["action"]
+                    state, terminal = self.model.prepare_state(
+                        latest_scan, distance, cos, sin, collision, goal, action
+                    )
+                    if terminal:
+                        continue
+                    next_sample = samples[i + 1]
+                    next_latest_scan = next_sample["latest_scan"]
+                    next_distance = next_sample["distance"]
+                    next_cos = next_sample["cos"]
+                    next_sin = next_sample["sin"]
+                    next_collision = next_sample["collision"]
+                    next_goal = next_sample["goal"]
+                    next_action = next_sample["action"]
+                    next_state, next_terminal = self.model.prepare_state(
+                        next_latest_scan,
+                        next_distance,
+                        next_cos,
+                        next_sin,
+                        next_collision,
+                        next_goal,
+                        next_action,
+                    )
+                    reward = self.reward_function(
+                        next_goal, next_collision, action, next_latest_scan
+                    )
+                    self.replay_buffer.add(
+                        state, action, reward, next_terminal, next_state
+                    )
+        return self.replay_buffer
+    def train(
+        self,
+        pretraining_iterations,
+        replay_buffer,
+        iterations,
+        batch_size,
+    ):
+        print("Running Pretraining")
+        for _ in tqdm(range(pretraining_iterations)):
+            self.model.train(
+                replay_buffer=replay_buffer,
+                iterations=iterations,
+                batch_size=batch_size,
+            )
+        print("Model Pretrained")
+def get_buffer(
+    model,
+    sim,
+    load_saved_buffer,
+    pretrain,
+    pretraining_iterations,
+    training_iterations,
+    batch_size,
+    buffer_size=50000,
+    random_seed=666,
+    file_names=["assets/data.yml"],
+    history_len=10,
+):
+    replay_buffer = ReplayBuffer(buffer_size=buffer_size, random_seed=random_seed)
+    if pretrain:
+        assert (
+            load_saved_buffer
+        ), "To pre-train model, load_saved_buffer must be set to True"
+    if load_saved_buffer:
+        pretraining = Pretraining(
+            file_names=file_names,
+            model=model,
+            replay_buffer=replay_buffer,
+            reward_function=sim.get_reward,
+        )  # instantiate pre-trainind
+        replay_buffer = (
+            pretraining.load_buffer()
+        )  # fill buffer with experiences from the data.yml file
+        if pretrain:
+            pretraining.train(
+                pretraining_iterations=pretraining_iterations,
+                replay_buffer=replay_buffer,
+                iterations=training_iterations,
+                batch_size=batch_size,
+            )  # run pre-training
+    return replay_buffer