537 lines
23 KiB
Python
Executable File
537 lines
23 KiB
Python
Executable File
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
from matplotlib.animation import FuncAnimation
|
|
import gymnasium as gym
|
|
from gymnasium import spaces
|
|
from typing import Any, Dict, Optional, Tuple
|
|
|
|
|
|
class WheeledRobotEnv(gym.Env):
|
|
"""
|
|
Base gymnasium environment for wheeled robot simulation.
|
|
|
|
This environment provides a foundation for simulating various types of wheeled robots
|
|
in a 2D plane. The robot state consists of (x, y, theta) representing position and
|
|
orientation. The environment supports different rendering modes and maintains a
|
|
history of robot positions for visualization.
|
|
|
|
Attributes:
|
|
dt (float): Time step for simulation (default: 0.05)
|
|
max_steps (int): Maximum number of simulation steps (default: 500)
|
|
state (np.ndarray): Current robot state [x, y, theta]
|
|
history (list): List of historical states for trajectory visualization
|
|
observation_space (spaces.Box): Valid observation space bounds
|
|
action_space (spaces.Space): Valid action space (defined by subclasses)
|
|
render_mode (str): Rendering mode ("human", "rgb_array", or None)
|
|
|
|
The coordinate system uses:
|
|
- x: horizontal position
|
|
- y: vertical position
|
|
- theta: orientation angle in radians
|
|
"""
|
|
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 20}
|
|
|
|
def __init__(self, render_mode: Optional[str] = None, random_target: bool = True, target_position: Optional[np.ndarray] = None, velocity_tolerance: float = 0.05, speed_limit_enabled: bool = False):
|
|
super().__init__()
|
|
|
|
self.dt = 0.05
|
|
self.max_steps = 400
|
|
self.current_step = 0
|
|
|
|
self.state = np.array([0.0, 0.0, 0.0], dtype=np.float32)
|
|
self.state_derivative = np.array([0.0, 0.0, 0.0], dtype=np.float32)
|
|
self.history = []
|
|
self.time_history = []
|
|
|
|
# Target position configuration
|
|
self.random_target = random_target
|
|
self.fixed_target = target_position if target_position is not None else np.array([10.0, 5.0, np.pi], dtype=np.float32)
|
|
self.target_position = np.array([0.0, 0.0, 0.0], dtype=np.float32)
|
|
|
|
# Target reaching parameters
|
|
self.position_tolerance = 0.2 # meters
|
|
self.orientation_tolerance = 0.1 # radians
|
|
self.velocity_tolerance = velocity_tolerance # units/step for "stopped" detection
|
|
self.target_reached_reward = 100.0
|
|
|
|
# Speed limiting parameters
|
|
self.speed_limit_enabled = speed_limit_enabled
|
|
self._wheel_speed_limits = None # To be set by subclasses
|
|
|
|
# Expanded observation space: [x, y, theta, dx, dy, dtheta, time, target_x, target_y, target_theta]
|
|
self.observation_space = spaces.Box(
|
|
low=np.array([-15.0, -10.0, -np.pi, -15.0, -10.0, -10.0, 0.0, -15.0, -10.0, -np.pi], dtype=np.float32),
|
|
high=np.array([15.0, 10.0, np.pi, 15.0, 10.0, 10.0, 1000.0, 15.0, 10.0, np.pi], dtype=np.float32),
|
|
dtype=np.float32
|
|
)
|
|
|
|
self.action_space = self._define_action_space()
|
|
|
|
self.render_mode = render_mode
|
|
self.h_fig = None
|
|
self.h_ax = None
|
|
|
|
# Default render bounds
|
|
self.render_xlim = (-5, 5)
|
|
self.render_ylim = (-2, 8)
|
|
|
|
self.F = self._F()
|
|
|
|
def _apply_wheel_speed_limits(self, wheel_speeds: np.ndarray) -> np.ndarray:
|
|
"""Apply wheel speed limits if enabled"""
|
|
if not self.speed_limit_enabled or self._wheel_speed_limits is None:
|
|
return wheel_speeds
|
|
|
|
# Apply symmetric limits to each wheel
|
|
limited_speeds = np.clip(wheel_speeds.flatten(),
|
|
-self._wheel_speed_limits,
|
|
self._wheel_speed_limits)
|
|
return limited_speeds.reshape(-1, 1)
|
|
|
|
def _define_action_space(self):
|
|
return spaces.Box(low=-5.0, high=5.0, shape=(3,), dtype=np.float32)
|
|
|
|
@staticmethod
|
|
def _F():
|
|
return np.eye(3)
|
|
|
|
@staticmethod
|
|
def _R(theta):
|
|
return np.array([[np.cos(theta), -np.sin(theta), 0.0],
|
|
[np.sin(theta), np.cos(theta), 0.0],
|
|
[0, 0, 1]])
|
|
|
|
def _generate_target_position(self) -> np.ndarray:
|
|
"""Generate a new target position"""
|
|
if self.random_target:
|
|
# Generate random target within bounds, avoiding robot start position
|
|
x = np.random.uniform(-10.0, 10.0)
|
|
y = np.random.uniform(-5.0, 8.0)
|
|
theta = np.random.uniform(0, 2*np.pi)
|
|
|
|
# Ensure target is not too close to start position (0,0,0)
|
|
while np.sqrt(x**2 + y**2) < 2.0:
|
|
x = np.random.uniform(-10.0, 10.0)
|
|
y = np.random.uniform(-5.0, 8.0)
|
|
|
|
return np.array([x, y, theta], dtype=np.float32)
|
|
else:
|
|
return self.fixed_target.copy()
|
|
|
|
def _is_target_reached(self) -> bool:
|
|
"""Check if robot has reached the target within tolerance AND is stopped"""
|
|
pos_error = np.abs(self.state[:2] - self.target_position[:2])
|
|
angle_error = np.abs(np.mod(self.state[2] - self.target_position[2] + np.pi, 2*np.pi) - np.pi)
|
|
robot_velocity = np.linalg.norm(self.state_derivative)
|
|
|
|
return bool((pos_error[0] < self.position_tolerance) and
|
|
(pos_error[1] < self.position_tolerance) and
|
|
(angle_error < self.orientation_tolerance) and
|
|
(robot_velocity < self.velocity_tolerance))
|
|
|
|
def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]:
|
|
if seed is not None:
|
|
np.random.seed(seed)
|
|
|
|
self.state = np.array([0.0, 0.0, 0.0], dtype=np.float32)
|
|
self.state_derivative = np.array([0.0, 0.0, 0.0], dtype=np.float32)
|
|
self.current_step = 0
|
|
self.history = [self.state.copy()]
|
|
self.time_history = [0.0]
|
|
|
|
# Generate new target position
|
|
self.target_position = self._generate_target_position()
|
|
|
|
# Include target in observation: [state, state_derivative, time, target]
|
|
observation = np.concatenate([self.state, self.state_derivative, [self.current_step * self.dt], self.target_position])
|
|
return observation.astype(np.float32), {}
|
|
|
|
def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, Dict[str, Any]]:
|
|
# if isinstance(self.action_space, spaces.Box):
|
|
# action = np.clip(action, self.action_space.low, self.action_space.high)
|
|
|
|
U = action.reshape(-1, 1)
|
|
X = self.state.reshape(-1, 1)
|
|
|
|
# Apply wheel speed limits before kinematic conversion
|
|
U = self._apply_wheel_speed_limits(U)
|
|
|
|
# U represents wheel velocities from controllers
|
|
# F converts wheel velocities to robot velocities
|
|
robot_velocities = self.F @ U
|
|
Xp = X + self._R(X[2, 0]) @ robot_velocities * self.dt
|
|
|
|
previous_state = self.state.copy()
|
|
self.state = Xp.flatten().astype(np.float32)
|
|
self.state[2] = np.mod(self.state[2], 2*np.pi) # Wrap theta to [0, 2π]
|
|
self.state_derivative = ((self.state - previous_state) / self.dt).astype(np.float32)
|
|
self.current_step += 1
|
|
self.history.append(self.state.copy())
|
|
self.time_history.append(self.current_step * self.dt)
|
|
|
|
reward = self._compute_reward(action)
|
|
terminated = self._is_terminated()
|
|
truncated = self.current_step >= self.max_steps
|
|
|
|
observation = np.concatenate([self.state, self.state_derivative, [self.current_step * self.dt], self.target_position])
|
|
return observation.astype(np.float32), reward, terminated, truncated, {}
|
|
|
|
def _compute_reward(self, action: np.ndarray) -> float:
|
|
# Base action penalty
|
|
base_reward = -0.01 * np.sum(action**2)
|
|
|
|
# Check if target is reached
|
|
position_error = np.linalg.norm(self.state[:2] - self.target_position[:2])
|
|
orientation_error = abs(self.state[2] - self.target_position[2])
|
|
|
|
# Handle angle wrapping for orientation error
|
|
orientation_error = min(orientation_error, 2*np.pi - orientation_error)
|
|
|
|
# Check if robot is both at target AND stopped
|
|
robot_velocity = np.linalg.norm(self.state_derivative)
|
|
|
|
# Target reached bonus (requires position, orientation AND velocity criteria)
|
|
if (position_error < self.position_tolerance and
|
|
orientation_error < self.orientation_tolerance and
|
|
robot_velocity < self.velocity_tolerance):
|
|
return base_reward + self.target_reached_reward
|
|
|
|
return base_reward
|
|
|
|
def _is_terminated(self) -> bool:
|
|
return self._is_target_reached()
|
|
|
|
def set_render_bounds(self, xlim: Tuple[float, float], ylim: Tuple[float, float]) -> None:
|
|
"""
|
|
Set the coordinate bounds for rendering.
|
|
|
|
Args:
|
|
xlim: Tuple of (x_min, x_max) for x-axis limits
|
|
ylim: Tuple of (y_min, y_max) for y-axis limits
|
|
"""
|
|
self.render_xlim = xlim
|
|
self.render_ylim = ylim
|
|
|
|
def render(self):
|
|
if self.render_mode == "human":
|
|
self._render_human()
|
|
elif self.render_mode == "rgb_array":
|
|
return self._render_rgb_array()
|
|
|
|
def _render_human(self):
|
|
if self.h_fig is None:
|
|
self._create_figure()
|
|
|
|
if len(self.history) > 1 and self.h_ax is not None:
|
|
history_array = np.array(self.history)
|
|
time_array = np.array(self.time_history)
|
|
current_time = self.current_step * self.dt
|
|
|
|
# Clear all subplots
|
|
for ax in self.h_ax:
|
|
ax.clear()
|
|
|
|
# Main trajectory plot (left panel)
|
|
self.h_ax[0].plot(history_array[:, 0], history_array[:, 1], 'b-', alpha=0.7, label='Trajectory')
|
|
self.h_ax[0].plot(self.state[0], self.state[1], 'ro', markersize=8, label='Current Position')
|
|
|
|
# Draw target position
|
|
self.h_ax[0].plot(self.target_position[0], self.target_position[1], 'gs', markersize=10, label='Target Position')
|
|
target_tip_points = self._get_robot_axis_points(self.target_position)
|
|
self.h_ax[0].plot(target_tip_points[0, :2], target_tip_points[1, :2], 'g--', linewidth=1, alpha=0.7)
|
|
self.h_ax[0].plot(target_tip_points[0, [0, 2]], target_tip_points[1, [0, 2]], 'g--', linewidth=1, alpha=0.7)
|
|
|
|
tip_points = self._get_robot_axis_points(self.state)
|
|
self.h_ax[0].plot(tip_points[0, :2], tip_points[1, :2], 'r-', linewidth=2, label='X-axis')
|
|
self.h_ax[0].plot(tip_points[0, [0, 2]], tip_points[1, [0, 2]], 'g-', linewidth=2, label='Y-axis')
|
|
|
|
self.h_ax[0].set_xlim(*self.render_xlim)
|
|
self.h_ax[0].set_ylim(*self.render_ylim)
|
|
self.h_ax[0].set_aspect('equal', adjustable='box')
|
|
self.h_ax[0].grid(True)
|
|
self.h_ax[0].set_xlabel('x [m]')
|
|
self.h_ax[0].set_ylabel('y [m]')
|
|
self.h_ax[0].set_title(f'Robot Position (Time: {current_time:04.2f}s -- Step: {self.current_step:03d})')
|
|
|
|
# Time-series plots (right panels)
|
|
labels = ['x [m]', 'y [m]', 'θ [rad]']
|
|
for i, (ax, label) in enumerate(zip(self.h_ax[1:], labels)):
|
|
ax.plot(time_array, history_array[:, i], 'k-', linewidth=1)
|
|
ax.plot(current_time, self.state[i], 'ro', markersize=6)
|
|
ax.set_xlabel('Time [s]')
|
|
ax.set_ylabel(label)
|
|
ax.grid(True)
|
|
ax.set_xlim(0, max(current_time + 1, 5))
|
|
|
|
# Fixed y-axis ranges for time-series plots
|
|
if i == 0: # x position
|
|
ax.set_ylim(*self.render_xlim) # Use same range as main plot
|
|
elif i == 1: # y position
|
|
ax.set_ylim(*self.render_ylim) # Use same range as main plot
|
|
elif i == 2: # theta angle
|
|
ax.set_ylim(0, 2*np.pi) # Fixed range for full rotation
|
|
|
|
plt.tight_layout()
|
|
plt.pause(self.dt)
|
|
|
|
def _render_rgb_array(self):
|
|
return None
|
|
|
|
def _create_figure(self):
|
|
self.h_fig = plt.figure(figsize=(10, 5.5))
|
|
|
|
# Create multi-panel layout matching original
|
|
self.h_ax = [
|
|
self.h_fig.add_subplot(1, 2, 1), # Main trajectory plot
|
|
self.h_fig.add_subplot(3, 2, 2), # x vs time
|
|
self.h_fig.add_subplot(3, 2, 4), # y vs time
|
|
self.h_fig.add_subplot(3, 2, 6) # theta vs time
|
|
]
|
|
|
|
# Configure main trajectory plot
|
|
self.h_ax[0].set_xlabel('x [m]')
|
|
self.h_ax[0].set_ylabel('y [m]')
|
|
self.h_ax[0].set_title('Wheeled Robot')
|
|
self.h_ax[0].axis('equal')
|
|
self.h_ax[0].grid(True)
|
|
self.h_ax[0].set_xlim(*self.render_xlim)
|
|
self.h_ax[0].set_ylim(*self.render_ylim)
|
|
|
|
# Configure time-series plots
|
|
labels = ['x [m]', 'y [m]', 'θ [rad]']
|
|
for i, (ax, label) in enumerate(zip(self.h_ax[1:], labels)):
|
|
ax.set_xlabel('Time [s]')
|
|
ax.set_ylabel(label)
|
|
ax.grid(True)
|
|
|
|
plt.tight_layout()
|
|
plt.ion()
|
|
|
|
def _get_robot_axis_points(self, state):
|
|
tip_points = np.zeros((2, 3), dtype=float)
|
|
tip_points[:, 0] = state[:2]
|
|
Rxy = self._R(state[2])[:2, :2]
|
|
tip_points[:, [1]] = Rxy @ np.array([[0.7], [0]]) + tip_points[:, [0]]
|
|
tip_points[:, [2]] = Rxy @ np.array([[0], [0.7]]) + tip_points[:, [0]]
|
|
return tip_points
|
|
|
|
def close(self):
|
|
if self.h_fig is not None:
|
|
plt.close(self.h_fig)
|
|
self.h_fig = None
|
|
self.h_ax = None
|
|
|
|
class OmniRobotEnv(WheeledRobotEnv):
|
|
"""
|
|
Omnidirectional robot environment using three-wheel omnidirectional drive.
|
|
|
|
This environment simulates a robot with three omnidirectional wheels arranged
|
|
at 120-degree intervals. The robot can move in any direction and rotate
|
|
simultaneously without changing orientation first.
|
|
|
|
The action space consists of 3 wheel velocities, which are converted to
|
|
robot motion through the kinematic transformation matrix F.
|
|
|
|
Wheel configuration:
|
|
- Wheel 1: 0° (front)
|
|
- Wheel 2: 120° (rear left)
|
|
- Wheel 3: 240° (rear right)
|
|
- All wheels at distance l=0.5 from center
|
|
- Wheel radius r=0.1
|
|
|
|
Action space: Box(3,) representing wheel velocities [-100.0, 100.0]
|
|
"""
|
|
|
|
def __init__(self, render_mode: Optional[str] = None, random_target: bool = True, target_position: Optional[np.ndarray] = None, velocity_tolerance: float = 0.05, speed_limit_enabled: bool = False):
|
|
super().__init__(render_mode, random_target, target_position, velocity_tolerance, speed_limit_enabled)
|
|
self._wheel_speed_limits = np.array([70.0, 70.0, 70.0])
|
|
|
|
def _define_action_space(self):
|
|
return spaces.Box(low=-100.0, high=100.0, shape=(3,), dtype=np.float32)
|
|
|
|
@staticmethod
|
|
def _F():
|
|
a1 = 0.0; b1 = 0.0; l1 = 0.5
|
|
a2 = (2/3)*np.pi; b2 = 0.0; l2 = 0.5
|
|
a3 = (4/3)*np.pi; b3 = 0.0; l3 = 0.5
|
|
r = 0.1
|
|
|
|
J = np.array([
|
|
[1/r*np.sin(a1+b1), -1/r*np.cos(a1+b1), -l1/r*np.cos(b1)],
|
|
[1/r*np.sin(a2+b2), -1/r*np.cos(a2+b2), -l2/r*np.cos(b2)],
|
|
[1/r*np.sin(a3+b3), -1/r*np.cos(a3+b3), -l3/r*np.cos(b3)]
|
|
])
|
|
F = np.linalg.inv(J)
|
|
|
|
return F
|
|
|
|
|
|
class MecanumRobotEnv(WheeledRobotEnv):
|
|
"""
|
|
Mecanum wheel robot environment using four-wheel mecanum drive.
|
|
|
|
This environment simulates a robot with four mecanum wheels arranged in a
|
|
rectangular configuration. Each wheel has rollers at 45° angles, allowing
|
|
the robot to move in any direction and rotate simultaneously.
|
|
|
|
The action space consists of 4 wheel velocities for the mecanum wheels,
|
|
which are converted to robot motion through the kinematic transformation
|
|
matrix F using the Moore-Penrose pseudoinverse.
|
|
|
|
Wheel configuration:
|
|
- Front-left wheel: position (+0.5, +0.5), roller direction (+1, +1)/√2
|
|
- Front-right wheel: position (-0.5, +0.5), roller direction (-1, +1)/√2
|
|
- Rear-left wheel: position (-0.5, -0.5), roller direction (-1, -1)/√2
|
|
- Rear-right wheel: position (+0.5, -0.5), roller direction (+1, -1)/√2
|
|
- All wheels have radius r=0.1
|
|
|
|
Action space: Box(4,) representing wheel velocities [-160.0, 160.0]
|
|
"""
|
|
|
|
def __init__(self, render_mode: Optional[str] = None, random_target: bool = True, target_position: Optional[np.ndarray] = None, velocity_tolerance: float = 0.05, speed_limit_enabled: bool = False):
|
|
super().__init__(render_mode, random_target, target_position, velocity_tolerance, speed_limit_enabled)
|
|
self._wheel_speed_limits = np.array([40.0, 40.0, 40.0, 40.0])
|
|
|
|
def _define_action_space(self):
|
|
return spaces.Box(low=-160.0, high=160.0, shape=(4,), dtype=np.float32)
|
|
|
|
@staticmethod
|
|
def _F():
|
|
l1 = np.array([+0.5, +0.5]); f1 = np.array([+1, +1])/np.sqrt(2); d1 = np.array([+1, 0])
|
|
l2 = np.array([-0.5, +0.5]); f2 = np.array([-1, +1])/np.sqrt(2); d2 = np.array([+1, 0])
|
|
l3 = np.array([-0.5, -0.5]); f3 = np.array([-1, -1])/np.sqrt(2); d3 = np.array([-1, 0])
|
|
l4 = np.array([+0.5, -0.5]); f4 = np.array([+1, -1])/np.sqrt(2); d4 = np.array([-1, 0])
|
|
r = 0.1
|
|
|
|
J11 = f1[1]/(r*d1[0]*f1[1]-r*d1[1]*f1[0])
|
|
J21 = f2[1]/(r*d2[0]*f2[1]-r*d2[1]*f2[0])
|
|
J31 = f3[1]/(r*d3[0]*f3[1]-r*d3[1]*f3[0])
|
|
J41 = f4[1]/(r*d4[0]*f4[1]-r*d4[1]*f4[0])
|
|
J12 = -f1[0]/(r*d1[0]*f1[1]-r*d1[1]*f1[0])
|
|
J22 = -f2[0]/(r*d2[0]*f2[1]-r*d2[1]*f2[0])
|
|
J32 = -f3[0]/(r*d3[0]*f3[1]-r*d3[1]*f3[0])
|
|
J42 = -f4[0]/(r*d4[0]*f4[1]-r*d4[1]*f4[0])
|
|
J13 = (-f1[1]*l1[1]-f1[0]*l1[0])/(r*d1[0]*f1[1]-r*d1[1]*f1[0])
|
|
J23 = (-f2[1]*l2[1]-f2[0]*l2[0])/(r*d2[0]*f2[1]-r*d2[1]*f2[0])
|
|
J33 = (-f3[1]*l3[1]-f3[0]*l3[0])/(r*d3[0]*f3[1]-r*d3[1]*f3[0])
|
|
J43 = (-f4[1]*l4[1]-f4[0]*l4[0])/(r*d4[0]*f4[1]-r*d4[1]*f4[0])
|
|
J = np.array([[J11, J12, J13], [J21, J22, J23], [J31, J32, J33], [J41, J42, J43]])
|
|
|
|
F = np.linalg.pinv(J)
|
|
return F
|
|
|
|
|
|
class TankRobotEnv(WheeledRobotEnv):
|
|
"""
|
|
Tank-drive (differential drive) robot environment using two wheels.
|
|
|
|
This environment simulates a robot with two independently controlled wheels
|
|
on opposite sides. The robot moves forward/backward when both wheels rotate
|
|
in the same direction, and turns when wheels rotate at different speeds.
|
|
|
|
The robot cannot move sideways and must rotate to change direction.
|
|
The action space consists of 2 wheel velocities, which are converted to
|
|
robot motion through the kinematic transformation matrix F using the
|
|
Moore-Penrose pseudoinverse.
|
|
|
|
Wheel configuration:
|
|
- Left wheel: contributes to forward motion and rotation
|
|
- Right wheel: contributes to forward motion and rotation
|
|
- Wheel base b=1.0 (distance between wheels)
|
|
- Wheel radius r=0.1
|
|
|
|
Action space: Box(2,) representing left and right wheel velocities [-100.0, 100.0]
|
|
"""
|
|
|
|
def __init__(self, render_mode: Optional[str] = None, random_target: bool = True, target_position: Optional[np.ndarray] = None, velocity_tolerance: float = 0.05, speed_limit_enabled: bool = False):
|
|
super().__init__(render_mode, random_target, target_position, velocity_tolerance, speed_limit_enabled)
|
|
self._wheel_speed_limits = np.array([50.0, 50.0])
|
|
|
|
def _define_action_space(self):
|
|
return spaces.Box(low=-100.0, high=100.0, shape=(2,), dtype=np.float32)
|
|
|
|
@staticmethod
|
|
def _F():
|
|
b = 1
|
|
r = 0.1
|
|
|
|
J = np.array([
|
|
[+1/r, 0, -b/(2*r)],
|
|
[-1/r, 0, -b/(2*r)]
|
|
])
|
|
F = np.linalg.pinv(J)
|
|
|
|
return F
|
|
|
|
|
|
class AckermanRobotEnv(WheeledRobotEnv):
|
|
"""
|
|
Ackermann steering robot environment using bicycle kinematics.
|
|
|
|
This environment simulates a car-like robot with Ackermann steering geometry.
|
|
The robot has a front steering wheel and rear driving wheel, similar to a
|
|
bicycle model. It moves forward/backward and steers by changing the front
|
|
wheel angle.
|
|
|
|
Unlike other robot types, this class overrides the step() method to implement
|
|
bicycle kinematics instead of using the standard F matrix approach.
|
|
|
|
Vehicle parameters:
|
|
- Wheel radius r=0.1
|
|
- Uses bicycle kinematics model
|
|
- Steering angle affects turning radius
|
|
|
|
Action space: Box(2,) with:
|
|
- action[0]: driving velocity [-5.0, 5.0]
|
|
- action[1]: steering angle [-1.0, 1.0] radians
|
|
|
|
The robot cannot move sideways and follows curved paths based on the
|
|
steering angle and forward velocity.
|
|
"""
|
|
|
|
def __init__(self, render_mode: Optional[str] = None, random_target: bool = True, target_position: Optional[np.ndarray] = None, velocity_tolerance: float = 0.05, speed_limit_enabled: bool = False):
|
|
super().__init__(render_mode, random_target, target_position, velocity_tolerance, speed_limit_enabled)
|
|
# Set hardcoded limits with 10% margin above max observed (velocity: 1.3, steering: 0.6)
|
|
self._wheel_speed_limits = np.array([4, 2])
|
|
|
|
def _define_action_space(self):
|
|
return spaces.Box(low=np.array([-10.0, -1.0]), high=np.array([10.0, 1.0]), dtype=np.float32)
|
|
|
|
def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, Dict[str, Any]]:
|
|
# if isinstance(self.action_space, spaces.Box):
|
|
# action = np.clip(action, self.action_space.low, self.action_space.high)
|
|
|
|
# Apply speed limits for Ackerman robot (velocity, steering angle)
|
|
action = self._apply_wheel_speed_limits(action.reshape(-1, 1)).flatten()
|
|
|
|
r = 0.1
|
|
dU = action.reshape(-1, 1)
|
|
X = self.state.reshape(-1, 1)
|
|
|
|
if dU[1, 0] != 0:
|
|
dX = np.array([[r*dU[0,0]], [0], [r*dU[0,0]/dU[1,0]]])
|
|
else:
|
|
dX = np.array([[r*dU[0,0]], [0], [0]])
|
|
|
|
Xp = X + self._R(X[2, 0]) @ dX * self.dt
|
|
|
|
previous_state = self.state.copy()
|
|
self.state = Xp.flatten().astype(np.float32)
|
|
self.state[2] = np.mod(self.state[2], 2*np.pi) # Wrap theta to [0, 2π]
|
|
self.state_derivative = ((self.state - previous_state) / self.dt).astype(np.float32)
|
|
self.current_step += 1
|
|
self.history.append(self.state.copy())
|
|
self.time_history.append(self.current_step * self.dt)
|
|
|
|
reward = self._compute_reward(action)
|
|
terminated = self._is_terminated()
|
|
truncated = self.current_step >= self.max_steps
|
|
|
|
observation = np.concatenate([self.state, self.state_derivative, [self.current_step * self.dt], self.target_position])
|
|
return observation.astype(np.float32), reward, terminated, truncated, {}
|
|
|
|
|
|
|
|
|
|
|