Added The WheeledRobot env
This commit is contained in:
parent
55bb2734d0
commit
00e5a5e84e
510
WheeledRobot.py
Executable file
510
WheeledRobot.py
Executable file
@ -0,0 +1,510 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.animation import FuncAnimation
|
||||
import gymnasium as gym
|
||||
from gymnasium import spaces
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
|
||||
class WheeledRobotEnv(gym.Env):
|
||||
"""
|
||||
Base gymnasium environment for wheeled robot simulation.
|
||||
|
||||
This environment provides a foundation for simulating various types of wheeled robots
|
||||
in a 2D plane. The robot state consists of (x, y, theta) representing position and
|
||||
orientation. The environment supports different rendering modes and maintains a
|
||||
history of robot positions for visualization.
|
||||
|
||||
Attributes:
|
||||
dt (float): Time step for simulation (default: 0.05)
|
||||
max_steps (int): Maximum number of simulation steps (default: 500)
|
||||
state (np.ndarray): Current robot state [x, y, theta]
|
||||
history (list): List of historical states for trajectory visualization
|
||||
observation_space (spaces.Box): Valid observation space bounds
|
||||
action_space (spaces.Space): Valid action space (defined by subclasses)
|
||||
render_mode (str): Rendering mode ("human", "rgb_array", or None)
|
||||
|
||||
The coordinate system uses:
|
||||
- x: horizontal position
|
||||
- y: vertical position
|
||||
- theta: orientation angle in radians
|
||||
"""
|
||||
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 20}
|
||||
|
||||
def __init__(self, render_mode: Optional[str] = None, random_target: bool = True, target_position: Optional[np.ndarray] = None, velocity_tolerance: float = 0.05):
|
||||
super().__init__()
|
||||
|
||||
self.dt = 0.05
|
||||
self.max_steps = 400
|
||||
self.current_step = 0
|
||||
|
||||
self.state = np.array([0.0, 0.0, 0.0], dtype=np.float32)
|
||||
self.state_derivative = np.array([0.0, 0.0, 0.0], dtype=np.float32)
|
||||
self.history = []
|
||||
self.time_history = []
|
||||
|
||||
# Target position configuration
|
||||
self.random_target = random_target
|
||||
self.fixed_target = target_position if target_position is not None else np.array([10.0, 5.0, np.pi], dtype=np.float32)
|
||||
self.target_position = np.array([0.0, 0.0, 0.0], dtype=np.float32)
|
||||
|
||||
# Target reaching parameters
|
||||
self.position_tolerance = 0.2 # meters
|
||||
self.orientation_tolerance = 0.1 # radians
|
||||
self.velocity_tolerance = velocity_tolerance # units/step for "stopped" detection
|
||||
self.target_reached_reward = 100.0
|
||||
|
||||
# Expanded observation space: [x, y, theta, dx, dy, dtheta, time, target_x, target_y, target_theta]
|
||||
self.observation_space = spaces.Box(
|
||||
low=np.array([-15.0, -10.0, -np.pi, -15.0, -10.0, -10.0, 0.0, -15.0, -10.0, -np.pi], dtype=np.float32),
|
||||
high=np.array([15.0, 10.0, np.pi, 15.0, 10.0, 10.0, 1000.0, 15.0, 10.0, np.pi], dtype=np.float32),
|
||||
dtype=np.float32
|
||||
)
|
||||
|
||||
self.action_space = self._define_action_space()
|
||||
|
||||
self.render_mode = render_mode
|
||||
self.h_fig = None
|
||||
self.h_ax = None
|
||||
|
||||
# Default render bounds
|
||||
self.render_xlim = (-5, 5)
|
||||
self.render_ylim = (-2, 8)
|
||||
|
||||
self.F = self._F()
|
||||
|
||||
def _define_action_space(self):
|
||||
return spaces.Box(low=-5.0, high=5.0, shape=(3,), dtype=np.float32)
|
||||
|
||||
@staticmethod
|
||||
def _F():
|
||||
return np.eye(3)
|
||||
|
||||
@staticmethod
|
||||
def _R(theta):
|
||||
return np.array([[np.cos(theta), -np.sin(theta), 0.0],
|
||||
[np.sin(theta), np.cos(theta), 0.0],
|
||||
[0, 0, 1]])
|
||||
|
||||
def _generate_target_position(self) -> np.ndarray:
|
||||
"""Generate a new target position"""
|
||||
if self.random_target:
|
||||
# Generate random target within bounds, avoiding robot start position
|
||||
x = np.random.uniform(-10.0, 10.0)
|
||||
y = np.random.uniform(-5.0, 8.0)
|
||||
theta = np.random.uniform(0, 2*np.pi)
|
||||
|
||||
# Ensure target is not too close to start position (0,0,0)
|
||||
while np.sqrt(x**2 + y**2) < 2.0:
|
||||
x = np.random.uniform(-10.0, 10.0)
|
||||
y = np.random.uniform(-5.0, 8.0)
|
||||
|
||||
return np.array([x, y, theta], dtype=np.float32)
|
||||
else:
|
||||
return self.fixed_target.copy()
|
||||
|
||||
def _is_target_reached(self) -> bool:
|
||||
"""Check if robot has reached the target within tolerance AND is stopped"""
|
||||
pos_error = np.abs(self.state[:2] - self.target_position[:2])
|
||||
angle_error = np.abs(np.mod(self.state[2] - self.target_position[2] + np.pi, 2*np.pi) - np.pi)
|
||||
robot_velocity = np.linalg.norm(self.state_derivative)
|
||||
|
||||
return (pos_error[0] < self.position_tolerance and
|
||||
pos_error[1] < self.position_tolerance and
|
||||
angle_error < self.orientation_tolerance and
|
||||
robot_velocity < self.velocity_tolerance)
|
||||
|
||||
def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]:
|
||||
if seed is not None:
|
||||
np.random.seed(seed)
|
||||
|
||||
self.state = np.array([0.0, 0.0, 0.0], dtype=np.float32)
|
||||
self.state_derivative = np.array([0.0, 0.0, 0.0], dtype=np.float32)
|
||||
self.current_step = 0
|
||||
self.history = [self.state.copy()]
|
||||
self.time_history = [0.0]
|
||||
|
||||
# Generate new target position
|
||||
self.target_position = self._generate_target_position()
|
||||
|
||||
# Include target in observation: [state, state_derivative, time, target]
|
||||
observation = np.concatenate([self.state, self.state_derivative, [self.current_step * self.dt], self.target_position])
|
||||
return observation.astype(np.float32), {}
|
||||
|
||||
def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, Dict[str, Any]]:
|
||||
# if isinstance(self.action_space, spaces.Box):
|
||||
# action = np.clip(action, self.action_space.low, self.action_space.high)
|
||||
|
||||
U = action.reshape(-1, 1)
|
||||
X = self.state.reshape(-1, 1)
|
||||
|
||||
# U represents wheel velocities from controllers
|
||||
# F converts wheel velocities to robot velocities
|
||||
robot_velocities = self.F @ U
|
||||
Xp = X + self._R(X[2, 0]) @ robot_velocities * self.dt
|
||||
|
||||
previous_state = self.state.copy()
|
||||
self.state = Xp.flatten().astype(np.float32)
|
||||
self.state[2] = np.mod(self.state[2], 2*np.pi) # Wrap theta to [0, 2π]
|
||||
self.state_derivative = ((self.state - previous_state) / self.dt).astype(np.float32)
|
||||
self.current_step += 1
|
||||
self.history.append(self.state.copy())
|
||||
self.time_history.append(self.current_step * self.dt)
|
||||
|
||||
reward = self._compute_reward(action)
|
||||
terminated = self._is_terminated()
|
||||
truncated = self.current_step >= self.max_steps
|
||||
|
||||
observation = np.concatenate([self.state, self.state_derivative, [self.current_step * self.dt], self.target_position])
|
||||
return observation.astype(np.float32), reward, terminated, truncated, {}
|
||||
|
||||
def _compute_reward(self, action: np.ndarray) -> float:
|
||||
# Base action penalty
|
||||
base_reward = -0.01 * np.sum(action**2)
|
||||
|
||||
# Check if target is reached
|
||||
position_error = np.linalg.norm(self.state[:2] - self.target_position[:2])
|
||||
orientation_error = abs(self.state[2] - self.target_position[2])
|
||||
|
||||
# Handle angle wrapping for orientation error
|
||||
orientation_error = min(orientation_error, 2*np.pi - orientation_error)
|
||||
|
||||
# Check if robot is both at target AND stopped
|
||||
robot_velocity = np.linalg.norm(self.state_derivative)
|
||||
|
||||
# Target reached bonus (requires position, orientation AND velocity criteria)
|
||||
if (position_error < self.position_tolerance and
|
||||
orientation_error < self.orientation_tolerance and
|
||||
robot_velocity < self.velocity_tolerance):
|
||||
return base_reward + self.target_reached_reward
|
||||
|
||||
return base_reward
|
||||
|
||||
def _is_terminated(self) -> bool:
|
||||
return self._is_target_reached()
|
||||
|
||||
def set_render_bounds(self, xlim: Tuple[float, float], ylim: Tuple[float, float]) -> None:
|
||||
"""
|
||||
Set the coordinate bounds for rendering.
|
||||
|
||||
Args:
|
||||
xlim: Tuple of (x_min, x_max) for x-axis limits
|
||||
ylim: Tuple of (y_min, y_max) for y-axis limits
|
||||
"""
|
||||
self.render_xlim = xlim
|
||||
self.render_ylim = ylim
|
||||
|
||||
def render(self):
|
||||
if self.render_mode == "human":
|
||||
self._render_human()
|
||||
elif self.render_mode == "rgb_array":
|
||||
return self._render_rgb_array()
|
||||
|
||||
def _render_human(self):
|
||||
if self.h_fig is None:
|
||||
self._create_figure()
|
||||
|
||||
if len(self.history) > 1 and self.h_ax is not None:
|
||||
history_array = np.array(self.history)
|
||||
time_array = np.array(self.time_history)
|
||||
current_time = self.current_step * self.dt
|
||||
|
||||
# Clear all subplots
|
||||
for ax in self.h_ax:
|
||||
ax.clear()
|
||||
|
||||
# Main trajectory plot (left panel)
|
||||
self.h_ax[0].plot(history_array[:, 0], history_array[:, 1], 'b-', alpha=0.7, label='Trajectory')
|
||||
self.h_ax[0].plot(self.state[0], self.state[1], 'ro', markersize=8, label='Current Position')
|
||||
|
||||
# Draw target position
|
||||
self.h_ax[0].plot(self.target_position[0], self.target_position[1], 'gs', markersize=10, label='Target Position')
|
||||
target_tip_points = self._get_robot_axis_points(self.target_position)
|
||||
self.h_ax[0].plot(target_tip_points[0, :2], target_tip_points[1, :2], 'g--', linewidth=1, alpha=0.7)
|
||||
self.h_ax[0].plot(target_tip_points[0, [0, 2]], target_tip_points[1, [0, 2]], 'g--', linewidth=1, alpha=0.7)
|
||||
|
||||
tip_points = self._get_robot_axis_points(self.state)
|
||||
self.h_ax[0].plot(tip_points[0, :2], tip_points[1, :2], 'r-', linewidth=2, label='X-axis')
|
||||
self.h_ax[0].plot(tip_points[0, [0, 2]], tip_points[1, [0, 2]], 'g-', linewidth=2, label='Y-axis')
|
||||
|
||||
self.h_ax[0].set_xlim(*self.render_xlim)
|
||||
self.h_ax[0].set_ylim(*self.render_ylim)
|
||||
self.h_ax[0].set_aspect('equal', adjustable='box')
|
||||
self.h_ax[0].grid(True)
|
||||
self.h_ax[0].set_xlabel('x [m]')
|
||||
self.h_ax[0].set_ylabel('y [m]')
|
||||
self.h_ax[0].set_title(f'Robot Position (Time: {current_time:04.2f}s -- Step: {self.current_step:03d})')
|
||||
|
||||
# Time-series plots (right panels)
|
||||
labels = ['x [m]', 'y [m]', 'θ [rad]']
|
||||
for i, (ax, label) in enumerate(zip(self.h_ax[1:], labels)):
|
||||
ax.plot(time_array, history_array[:, i], 'k-', linewidth=1)
|
||||
ax.plot(current_time, self.state[i], 'ro', markersize=6)
|
||||
ax.set_xlabel('Time [s]')
|
||||
ax.set_ylabel(label)
|
||||
ax.grid(True)
|
||||
ax.set_xlim(0, max(current_time + 1, 5))
|
||||
|
||||
# Fixed y-axis ranges for time-series plots
|
||||
if i == 0: # x position
|
||||
ax.set_ylim(*self.render_xlim) # Use same range as main plot
|
||||
elif i == 1: # y position
|
||||
ax.set_ylim(*self.render_ylim) # Use same range as main plot
|
||||
elif i == 2: # theta angle
|
||||
ax.set_ylim(0, 2*np.pi) # Fixed range for full rotation
|
||||
|
||||
plt.tight_layout()
|
||||
plt.pause(self.dt)
|
||||
|
||||
def _render_rgb_array(self):
|
||||
return None
|
||||
|
||||
def _create_figure(self):
|
||||
self.h_fig = plt.figure(figsize=(10, 5.5))
|
||||
|
||||
# Create multi-panel layout matching original
|
||||
self.h_ax = [
|
||||
self.h_fig.add_subplot(1, 2, 1), # Main trajectory plot
|
||||
self.h_fig.add_subplot(3, 2, 2), # x vs time
|
||||
self.h_fig.add_subplot(3, 2, 4), # y vs time
|
||||
self.h_fig.add_subplot(3, 2, 6) # theta vs time
|
||||
]
|
||||
|
||||
# Configure main trajectory plot
|
||||
self.h_ax[0].set_xlabel('x [m]')
|
||||
self.h_ax[0].set_ylabel('y [m]')
|
||||
self.h_ax[0].set_title('Wheeled Robot')
|
||||
self.h_ax[0].axis('equal')
|
||||
self.h_ax[0].grid(True)
|
||||
self.h_ax[0].set_xlim(*self.render_xlim)
|
||||
self.h_ax[0].set_ylim(*self.render_ylim)
|
||||
|
||||
# Configure time-series plots
|
||||
labels = ['x [m]', 'y [m]', 'θ [rad]']
|
||||
for i, (ax, label) in enumerate(zip(self.h_ax[1:], labels)):
|
||||
ax.set_xlabel('Time [s]')
|
||||
ax.set_ylabel(label)
|
||||
ax.grid(True)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.ion()
|
||||
|
||||
def _get_robot_axis_points(self, state):
|
||||
tip_points = np.zeros((2, 3), dtype=float)
|
||||
tip_points[:, 0] = state[:2]
|
||||
Rxy = self._R(state[2])[:2, :2]
|
||||
tip_points[:, [1]] = Rxy @ np.array([[0.7], [0]]) + tip_points[:, [0]]
|
||||
tip_points[:, [2]] = Rxy @ np.array([[0], [0.7]]) + tip_points[:, [0]]
|
||||
return tip_points
|
||||
|
||||
def close(self):
|
||||
if self.h_fig is not None:
|
||||
plt.close(self.h_fig)
|
||||
self.h_fig = None
|
||||
self.h_ax = None
|
||||
|
||||
class OmniRobotEnv(WheeledRobotEnv):
|
||||
"""
|
||||
Omnidirectional robot environment using three-wheel omnidirectional drive.
|
||||
|
||||
This environment simulates a robot with three omnidirectional wheels arranged
|
||||
at 120-degree intervals. The robot can move in any direction and rotate
|
||||
simultaneously without changing orientation first.
|
||||
|
||||
The action space consists of 3 wheel velocities, which are converted to
|
||||
robot motion through the kinematic transformation matrix F.
|
||||
|
||||
Wheel configuration:
|
||||
- Wheel 1: 0° (front)
|
||||
- Wheel 2: 120° (rear left)
|
||||
- Wheel 3: 240° (rear right)
|
||||
- All wheels at distance l=0.5 from center
|
||||
- Wheel radius r=0.1
|
||||
|
||||
Action space: Box(3,) representing wheel velocities [-100.0, 100.0]
|
||||
"""
|
||||
|
||||
def __init__(self, render_mode: Optional[str] = None, random_target: bool = True, target_position: Optional[np.ndarray] = None, velocity_tolerance: float = 0.05):
|
||||
super().__init__(render_mode, random_target, target_position, velocity_tolerance)
|
||||
|
||||
def _define_action_space(self):
|
||||
return spaces.Box(low=-100.0, high=100.0, shape=(3,), dtype=np.float32)
|
||||
|
||||
@staticmethod
|
||||
def _F():
|
||||
a1 = 0.0; b1 = 0.0; l1 = 0.5
|
||||
a2 = (2/3)*np.pi; b2 = 0.0; l2 = 0.5
|
||||
a3 = (4/3)*np.pi; b3 = 0.0; l3 = 0.5
|
||||
r = 0.1
|
||||
|
||||
J = np.array([
|
||||
[1/r*np.sin(a1+b1), -1/r*np.cos(a1+b1), -l1/r*np.cos(b1)],
|
||||
[1/r*np.sin(a2+b2), -1/r*np.cos(a2+b2), -l2/r*np.cos(b2)],
|
||||
[1/r*np.sin(a3+b3), -1/r*np.cos(a3+b3), -l3/r*np.cos(b3)]
|
||||
])
|
||||
F = np.linalg.inv(J)
|
||||
|
||||
return F
|
||||
|
||||
|
||||
class MecanumRobotEnv(WheeledRobotEnv):
|
||||
"""
|
||||
Mecanum wheel robot environment using four-wheel mecanum drive.
|
||||
|
||||
This environment simulates a robot with four mecanum wheels arranged in a
|
||||
rectangular configuration. Each wheel has rollers at 45° angles, allowing
|
||||
the robot to move in any direction and rotate simultaneously.
|
||||
|
||||
The action space consists of 4 wheel velocities for the mecanum wheels,
|
||||
which are converted to robot motion through the kinematic transformation
|
||||
matrix F using the Moore-Penrose pseudoinverse.
|
||||
|
||||
Wheel configuration:
|
||||
- Front-left wheel: position (+0.5, +0.5), roller direction (+1, +1)/√2
|
||||
- Front-right wheel: position (-0.5, +0.5), roller direction (-1, +1)/√2
|
||||
- Rear-left wheel: position (-0.5, -0.5), roller direction (-1, -1)/√2
|
||||
- Rear-right wheel: position (+0.5, -0.5), roller direction (+1, -1)/√2
|
||||
- All wheels have radius r=0.1
|
||||
|
||||
Action space: Box(4,) representing wheel velocities [-160.0, 160.0]
|
||||
"""
|
||||
|
||||
def __init__(self, render_mode: Optional[str] = None, random_target: bool = True, target_position: Optional[np.ndarray] = None, velocity_tolerance: float = 0.05):
|
||||
super().__init__(render_mode, random_target, target_position, velocity_tolerance)
|
||||
|
||||
def _define_action_space(self):
|
||||
return spaces.Box(low=-160.0, high=160.0, shape=(4,), dtype=np.float32)
|
||||
|
||||
@staticmethod
|
||||
def _F():
|
||||
l1 = np.array([+0.5, +0.5]); f1 = np.array([+1, +1])/np.sqrt(2); d1 = np.array([+1, 0])
|
||||
l2 = np.array([-0.5, +0.5]); f2 = np.array([-1, +1])/np.sqrt(2); d2 = np.array([+1, 0])
|
||||
l3 = np.array([-0.5, -0.5]); f3 = np.array([-1, -1])/np.sqrt(2); d3 = np.array([-1, 0])
|
||||
l4 = np.array([+0.5, -0.5]); f4 = np.array([+1, -1])/np.sqrt(2); d4 = np.array([-1, 0])
|
||||
r = 0.1
|
||||
|
||||
J11 = f1[1]/(r*d1[0]*f1[1]-r*d1[1]*f1[0])
|
||||
J21 = f2[1]/(r*d2[0]*f2[1]-r*d2[1]*f2[0])
|
||||
J31 = f3[1]/(r*d3[0]*f3[1]-r*d3[1]*f3[0])
|
||||
J41 = f4[1]/(r*d4[0]*f4[1]-r*d4[1]*f4[0])
|
||||
J12 = -f1[0]/(r*d1[0]*f1[1]-r*d1[1]*f1[0])
|
||||
J22 = -f2[0]/(r*d2[0]*f2[1]-r*d2[1]*f2[0])
|
||||
J32 = -f3[0]/(r*d3[0]*f3[1]-r*d3[1]*f3[0])
|
||||
J42 = -f4[0]/(r*d4[0]*f4[1]-r*d4[1]*f4[0])
|
||||
J13 = (-f1[1]*l1[1]-f1[0]*l1[0])/(r*d1[0]*f1[1]-r*d1[1]*f1[0])
|
||||
J23 = (-f2[1]*l2[1]-f2[0]*l2[0])/(r*d2[0]*f2[1]-r*d2[1]*f2[0])
|
||||
J33 = (-f3[1]*l3[1]-f3[0]*l3[0])/(r*d3[0]*f3[1]-r*d3[1]*f3[0])
|
||||
J43 = (-f4[1]*l4[1]-f4[0]*l4[0])/(r*d4[0]*f4[1]-r*d4[1]*f4[0])
|
||||
J = np.array([[J11, J12, J13], [J21, J22, J23], [J31, J32, J33], [J41, J42, J43]])
|
||||
|
||||
F = np.linalg.pinv(J)
|
||||
return F
|
||||
|
||||
|
||||
class TankRobotEnv(WheeledRobotEnv):
|
||||
"""
|
||||
Tank-drive (differential drive) robot environment using two wheels.
|
||||
|
||||
This environment simulates a robot with two independently controlled wheels
|
||||
on opposite sides. The robot moves forward/backward when both wheels rotate
|
||||
in the same direction, and turns when wheels rotate at different speeds.
|
||||
|
||||
The robot cannot move sideways and must rotate to change direction.
|
||||
The action space consists of 2 wheel velocities, which are converted to
|
||||
robot motion through the kinematic transformation matrix F using the
|
||||
Moore-Penrose pseudoinverse.
|
||||
|
||||
Wheel configuration:
|
||||
- Left wheel: contributes to forward motion and rotation
|
||||
- Right wheel: contributes to forward motion and rotation
|
||||
- Wheel base b=1.0 (distance between wheels)
|
||||
- Wheel radius r=0.1
|
||||
|
||||
Action space: Box(2,) representing left and right wheel velocities [-100.0, 100.0]
|
||||
"""
|
||||
|
||||
def __init__(self, render_mode: Optional[str] = None, random_target: bool = True, target_position: Optional[np.ndarray] = None, velocity_tolerance: float = 0.05):
|
||||
super().__init__(render_mode, random_target, target_position, velocity_tolerance)
|
||||
|
||||
def _define_action_space(self):
|
||||
return spaces.Box(low=-100.0, high=100.0, shape=(2,), dtype=np.float32)
|
||||
|
||||
@staticmethod
|
||||
def _F():
|
||||
b = 1
|
||||
r = 0.1
|
||||
|
||||
J = np.array([
|
||||
[+1/r, 0, -b/(2*r)],
|
||||
[-1/r, 0, -b/(2*r)]
|
||||
])
|
||||
F = np.linalg.pinv(J)
|
||||
|
||||
return F
|
||||
|
||||
|
||||
class AckermanRobotEnv(WheeledRobotEnv):
|
||||
"""
|
||||
Ackermann steering robot environment using bicycle kinematics.
|
||||
|
||||
This environment simulates a car-like robot with Ackermann steering geometry.
|
||||
The robot has a front steering wheel and rear driving wheel, similar to a
|
||||
bicycle model. It moves forward/backward and steers by changing the front
|
||||
wheel angle.
|
||||
|
||||
Unlike other robot types, this class overrides the step() method to implement
|
||||
bicycle kinematics instead of using the standard F matrix approach.
|
||||
|
||||
Vehicle parameters:
|
||||
- Wheel radius r=0.1
|
||||
- Uses bicycle kinematics model
|
||||
- Steering angle affects turning radius
|
||||
|
||||
Action space: Box(2,) with:
|
||||
- action[0]: driving velocity [-5.0, 5.0]
|
||||
- action[1]: steering angle [-1.0, 1.0] radians
|
||||
|
||||
The robot cannot move sideways and follows curved paths based on the
|
||||
steering angle and forward velocity.
|
||||
"""
|
||||
|
||||
def __init__(self, render_mode: Optional[str] = None, random_target: bool = True, target_position: Optional[np.ndarray] = None, velocity_tolerance: float = 0.05):
|
||||
super().__init__(render_mode, random_target, target_position, velocity_tolerance)
|
||||
|
||||
def _define_action_space(self):
|
||||
return spaces.Box(low=np.array([-10.0, -1.0]), high=np.array([10.0, 1.0]), dtype=np.float32)
|
||||
|
||||
def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, Dict[str, Any]]:
|
||||
# if isinstance(self.action_space, spaces.Box):
|
||||
# action = np.clip(action, self.action_space.low, self.action_space.high)
|
||||
|
||||
r = 0.1
|
||||
dU = action.reshape(-1, 1)
|
||||
X = self.state.reshape(-1, 1)
|
||||
|
||||
if dU[1, 0] != 0:
|
||||
dX = np.array([[r*dU[0,0]], [0], [r*dU[0,0]/dU[1,0]]])
|
||||
else:
|
||||
dX = np.array([[r*dU[0,0]], [0], [0]])
|
||||
|
||||
Xp = X + self._R(X[2, 0]) @ dX * self.dt
|
||||
|
||||
previous_state = self.state.copy()
|
||||
self.state = Xp.flatten().astype(np.float32)
|
||||
self.state[2] = np.mod(self.state[2], 2*np.pi) # Wrap theta to [0, 2π]
|
||||
self.state_derivative = ((self.state - previous_state) / self.dt).astype(np.float32)
|
||||
self.current_step += 1
|
||||
self.history.append(self.state.copy())
|
||||
self.time_history.append(self.current_step * self.dt)
|
||||
|
||||
reward = self._compute_reward(action)
|
||||
terminated = self._is_terminated()
|
||||
truncated = self.current_step >= self.max_steps
|
||||
|
||||
observation = np.concatenate([self.state, self.state_derivative, [self.current_step * self.dt], self.target_position])
|
||||
return observation.astype(np.float32), reward, terminated, truncated, {}
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user