lawarob-wheeled/WheeledRobot.py
2025-10-01 16:39:25 +02:00

537 lines
23 KiB
Python
Executable File

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import gymnasium as gym
from gymnasium import spaces
from typing import Any, Dict, Optional, Tuple
class WheeledRobotEnv(gym.Env):
"""
Base gymnasium environment for wheeled robot simulation.
This environment provides a foundation for simulating various types of wheeled robots
in a 2D plane. The robot state consists of (x, y, theta) representing position and
orientation. The environment supports different rendering modes and maintains a
history of robot positions for visualization.
Attributes:
dt (float): Time step for simulation (default: 0.05)
max_steps (int): Maximum number of simulation steps (default: 500)
state (np.ndarray): Current robot state [x, y, theta]
history (list): List of historical states for trajectory visualization
observation_space (spaces.Box): Valid observation space bounds
action_space (spaces.Space): Valid action space (defined by subclasses)
render_mode (str): Rendering mode ("human", "rgb_array", or None)
The coordinate system uses:
- x: horizontal position
- y: vertical position
- theta: orientation angle in radians
"""
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 20}
def __init__(self, render_mode: Optional[str] = None, random_target: bool = True, target_position: Optional[np.ndarray] = None, velocity_tolerance: float = 0.05, speed_limit_enabled: bool = False):
super().__init__()
self.dt = 0.05
self.max_steps = 400
self.current_step = 0
self.state = np.array([0.0, 0.0, 0.0], dtype=np.float32)
self.state_derivative = np.array([0.0, 0.0, 0.0], dtype=np.float32)
self.history = []
self.time_history = []
# Target position configuration
self.random_target = random_target
self.fixed_target = target_position if target_position is not None else np.array([10.0, 5.0, np.pi], dtype=np.float32)
self.target_position = np.array([0.0, 0.0, 0.0], dtype=np.float32)
# Target reaching parameters
self.position_tolerance = 0.2 # meters
self.orientation_tolerance = 0.1 # radians
self.velocity_tolerance = velocity_tolerance # units/step for "stopped" detection
self.target_reached_reward = 100.0
# Speed limiting parameters
self.speed_limit_enabled = speed_limit_enabled
self._wheel_speed_limits = None # To be set by subclasses
# Expanded observation space: [x, y, theta, dx, dy, dtheta, time, target_x, target_y, target_theta]
self.observation_space = spaces.Box(
low=np.array([-15.0, -10.0, -np.pi, -15.0, -10.0, -10.0, 0.0, -15.0, -10.0, -np.pi], dtype=np.float32),
high=np.array([15.0, 10.0, np.pi, 15.0, 10.0, 10.0, 1000.0, 15.0, 10.0, np.pi], dtype=np.float32),
dtype=np.float32
)
self.action_space = self._define_action_space()
self.render_mode = render_mode
self.h_fig = None
self.h_ax = None
# Default render bounds
self.render_xlim = (-5, 5)
self.render_ylim = (-2, 8)
self.F = self._F()
def _apply_wheel_speed_limits(self, wheel_speeds: np.ndarray) -> np.ndarray:
"""Apply wheel speed limits if enabled"""
if not self.speed_limit_enabled or self._wheel_speed_limits is None:
return wheel_speeds
# Apply symmetric limits to each wheel
limited_speeds = np.clip(wheel_speeds.flatten(),
-self._wheel_speed_limits,
self._wheel_speed_limits)
return limited_speeds.reshape(-1, 1)
def _define_action_space(self):
return spaces.Box(low=-5.0, high=5.0, shape=(3,), dtype=np.float32)
@staticmethod
def _F():
return np.eye(3)
@staticmethod
def _R(theta):
return np.array([[np.cos(theta), -np.sin(theta), 0.0],
[np.sin(theta), np.cos(theta), 0.0],
[0, 0, 1]])
def _generate_target_position(self) -> np.ndarray:
"""Generate a new target position"""
if self.random_target:
# Generate random target within bounds, avoiding robot start position
x = np.random.uniform(-10.0, 10.0)
y = np.random.uniform(-5.0, 8.0)
theta = np.random.uniform(0, 2*np.pi)
# Ensure target is not too close to start position (0,0,0)
while np.sqrt(x**2 + y**2) < 2.0:
x = np.random.uniform(-10.0, 10.0)
y = np.random.uniform(-5.0, 8.0)
return np.array([x, y, theta], dtype=np.float32)
else:
return self.fixed_target.copy()
def _is_target_reached(self) -> bool:
"""Check if robot has reached the target within tolerance AND is stopped"""
pos_error = np.abs(self.state[:2] - self.target_position[:2])
angle_error = np.abs(np.mod(self.state[2] - self.target_position[2] + np.pi, 2*np.pi) - np.pi)
robot_velocity = np.linalg.norm(self.state_derivative)
return bool((pos_error[0] < self.position_tolerance) and
(pos_error[1] < self.position_tolerance) and
(angle_error < self.orientation_tolerance) and
(robot_velocity < self.velocity_tolerance))
def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]:
if seed is not None:
np.random.seed(seed)
self.state = np.array([0.0, 0.0, 0.0], dtype=np.float32)
self.state_derivative = np.array([0.0, 0.0, 0.0], dtype=np.float32)
self.current_step = 0
self.history = [self.state.copy()]
self.time_history = [0.0]
# Generate new target position
self.target_position = self._generate_target_position()
# Include target in observation: [state, state_derivative, time, target]
observation = np.concatenate([self.state, self.state_derivative, [self.current_step * self.dt], self.target_position])
return observation.astype(np.float32), {}
def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, Dict[str, Any]]:
# if isinstance(self.action_space, spaces.Box):
# action = np.clip(action, self.action_space.low, self.action_space.high)
U = action.reshape(-1, 1)
X = self.state.reshape(-1, 1)
# Apply wheel speed limits before kinematic conversion
U = self._apply_wheel_speed_limits(U)
# U represents wheel velocities from controllers
# F converts wheel velocities to robot velocities
robot_velocities = self.F @ U
Xp = X + self._R(X[2, 0]) @ robot_velocities * self.dt
previous_state = self.state.copy()
self.state = Xp.flatten().astype(np.float32)
self.state[2] = np.mod(self.state[2], 2*np.pi) # Wrap theta to [0, 2π]
self.state_derivative = ((self.state - previous_state) / self.dt).astype(np.float32)
self.current_step += 1
self.history.append(self.state.copy())
self.time_history.append(self.current_step * self.dt)
reward = self._compute_reward(action)
terminated = self._is_terminated()
truncated = self.current_step >= self.max_steps
observation = np.concatenate([self.state, self.state_derivative, [self.current_step * self.dt], self.target_position])
return observation.astype(np.float32), reward, terminated, truncated, {}
def _compute_reward(self, action: np.ndarray) -> float:
# Base action penalty
base_reward = -0.01 * np.sum(action**2)
# Check if target is reached
position_error = np.linalg.norm(self.state[:2] - self.target_position[:2])
orientation_error = abs(self.state[2] - self.target_position[2])
# Handle angle wrapping for orientation error
orientation_error = min(orientation_error, 2*np.pi - orientation_error)
# Check if robot is both at target AND stopped
robot_velocity = np.linalg.norm(self.state_derivative)
# Target reached bonus (requires position, orientation AND velocity criteria)
if (position_error < self.position_tolerance and
orientation_error < self.orientation_tolerance and
robot_velocity < self.velocity_tolerance):
return base_reward + self.target_reached_reward
return base_reward
def _is_terminated(self) -> bool:
return self._is_target_reached()
def set_render_bounds(self, xlim: Tuple[float, float], ylim: Tuple[float, float]) -> None:
"""
Set the coordinate bounds for rendering.
Args:
xlim: Tuple of (x_min, x_max) for x-axis limits
ylim: Tuple of (y_min, y_max) for y-axis limits
"""
self.render_xlim = xlim
self.render_ylim = ylim
def render(self):
if self.render_mode == "human":
self._render_human()
elif self.render_mode == "rgb_array":
return self._render_rgb_array()
def _render_human(self):
if self.h_fig is None:
self._create_figure()
if len(self.history) > 1 and self.h_ax is not None:
history_array = np.array(self.history)
time_array = np.array(self.time_history)
current_time = self.current_step * self.dt
# Clear all subplots
for ax in self.h_ax:
ax.clear()
# Main trajectory plot (left panel)
self.h_ax[0].plot(history_array[:, 0], history_array[:, 1], 'b-', alpha=0.7, label='Trajectory')
self.h_ax[0].plot(self.state[0], self.state[1], 'ro', markersize=8, label='Current Position')
# Draw target position
self.h_ax[0].plot(self.target_position[0], self.target_position[1], 'gs', markersize=10, label='Target Position')
target_tip_points = self._get_robot_axis_points(self.target_position)
self.h_ax[0].plot(target_tip_points[0, :2], target_tip_points[1, :2], 'g--', linewidth=1, alpha=0.7)
self.h_ax[0].plot(target_tip_points[0, [0, 2]], target_tip_points[1, [0, 2]], 'g--', linewidth=1, alpha=0.7)
tip_points = self._get_robot_axis_points(self.state)
self.h_ax[0].plot(tip_points[0, :2], tip_points[1, :2], 'r-', linewidth=2, label='X-axis')
self.h_ax[0].plot(tip_points[0, [0, 2]], tip_points[1, [0, 2]], 'g-', linewidth=2, label='Y-axis')
self.h_ax[0].set_xlim(*self.render_xlim)
self.h_ax[0].set_ylim(*self.render_ylim)
self.h_ax[0].set_aspect('equal', adjustable='box')
self.h_ax[0].grid(True)
self.h_ax[0].set_xlabel('x [m]')
self.h_ax[0].set_ylabel('y [m]')
self.h_ax[0].set_title(f'Robot Position (Time: {current_time:04.2f}s -- Step: {self.current_step:03d})')
# Time-series plots (right panels)
labels = ['x [m]', 'y [m]', 'θ [rad]']
for i, (ax, label) in enumerate(zip(self.h_ax[1:], labels)):
ax.plot(time_array, history_array[:, i], 'k-', linewidth=1)
ax.plot(current_time, self.state[i], 'ro', markersize=6)
ax.set_xlabel('Time [s]')
ax.set_ylabel(label)
ax.grid(True)
ax.set_xlim(0, max(current_time + 1, 5))
# Fixed y-axis ranges for time-series plots
if i == 0: # x position
ax.set_ylim(*self.render_xlim) # Use same range as main plot
elif i == 1: # y position
ax.set_ylim(*self.render_ylim) # Use same range as main plot
elif i == 2: # theta angle
ax.set_ylim(0, 2*np.pi) # Fixed range for full rotation
plt.tight_layout()
plt.pause(self.dt)
def _render_rgb_array(self):
return None
def _create_figure(self):
self.h_fig = plt.figure(figsize=(10, 5.5))
# Create multi-panel layout matching original
self.h_ax = [
self.h_fig.add_subplot(1, 2, 1), # Main trajectory plot
self.h_fig.add_subplot(3, 2, 2), # x vs time
self.h_fig.add_subplot(3, 2, 4), # y vs time
self.h_fig.add_subplot(3, 2, 6) # theta vs time
]
# Configure main trajectory plot
self.h_ax[0].set_xlabel('x [m]')
self.h_ax[0].set_ylabel('y [m]')
self.h_ax[0].set_title('Wheeled Robot')
self.h_ax[0].axis('equal')
self.h_ax[0].grid(True)
self.h_ax[0].set_xlim(*self.render_xlim)
self.h_ax[0].set_ylim(*self.render_ylim)
# Configure time-series plots
labels = ['x [m]', 'y [m]', 'θ [rad]']
for i, (ax, label) in enumerate(zip(self.h_ax[1:], labels)):
ax.set_xlabel('Time [s]')
ax.set_ylabel(label)
ax.grid(True)
plt.tight_layout()
plt.ion()
def _get_robot_axis_points(self, state):
tip_points = np.zeros((2, 3), dtype=float)
tip_points[:, 0] = state[:2]
Rxy = self._R(state[2])[:2, :2]
tip_points[:, [1]] = Rxy @ np.array([[0.7], [0]]) + tip_points[:, [0]]
tip_points[:, [2]] = Rxy @ np.array([[0], [0.7]]) + tip_points[:, [0]]
return tip_points
def close(self):
if self.h_fig is not None:
plt.close(self.h_fig)
self.h_fig = None
self.h_ax = None
class OmniRobotEnv(WheeledRobotEnv):
"""
Omnidirectional robot environment using three-wheel omnidirectional drive.
This environment simulates a robot with three omnidirectional wheels arranged
at 120-degree intervals. The robot can move in any direction and rotate
simultaneously without changing orientation first.
The action space consists of 3 wheel velocities, which are converted to
robot motion through the kinematic transformation matrix F.
Wheel configuration:
- Wheel 1: 0° (front)
- Wheel 2: 120° (rear left)
- Wheel 3: 240° (rear right)
- All wheels at distance l=0.5 from center
- Wheel radius r=0.1
Action space: Box(3,) representing wheel velocities [-100.0, 100.0]
"""
def __init__(self, render_mode: Optional[str] = None, random_target: bool = True, target_position: Optional[np.ndarray] = None, velocity_tolerance: float = 0.05, speed_limit_enabled: bool = False):
super().__init__(render_mode, random_target, target_position, velocity_tolerance, speed_limit_enabled)
self._wheel_speed_limits = np.array([70.0, 70.0, 70.0])
def _define_action_space(self):
return spaces.Box(low=-100.0, high=100.0, shape=(3,), dtype=np.float32)
@staticmethod
def _F():
a1 = 0.0; b1 = 0.0; l1 = 0.5
a2 = (2/3)*np.pi; b2 = 0.0; l2 = 0.5
a3 = (4/3)*np.pi; b3 = 0.0; l3 = 0.5
r = 0.1
J = np.array([
[1/r*np.sin(a1+b1), -1/r*np.cos(a1+b1), -l1/r*np.cos(b1)],
[1/r*np.sin(a2+b2), -1/r*np.cos(a2+b2), -l2/r*np.cos(b2)],
[1/r*np.sin(a3+b3), -1/r*np.cos(a3+b3), -l3/r*np.cos(b3)]
])
F = np.linalg.inv(J)
return F
class MecanumRobotEnv(WheeledRobotEnv):
"""
Mecanum wheel robot environment using four-wheel mecanum drive.
This environment simulates a robot with four mecanum wheels arranged in a
rectangular configuration. Each wheel has rollers at 45° angles, allowing
the robot to move in any direction and rotate simultaneously.
The action space consists of 4 wheel velocities for the mecanum wheels,
which are converted to robot motion through the kinematic transformation
matrix F using the Moore-Penrose pseudoinverse.
Wheel configuration:
- Front-left wheel: position (+0.5, +0.5), roller direction (+1, +1)/√2
- Front-right wheel: position (-0.5, +0.5), roller direction (-1, +1)/√2
- Rear-left wheel: position (-0.5, -0.5), roller direction (-1, -1)/√2
- Rear-right wheel: position (+0.5, -0.5), roller direction (+1, -1)/√2
- All wheels have radius r=0.1
Action space: Box(4,) representing wheel velocities [-160.0, 160.0]
"""
def __init__(self, render_mode: Optional[str] = None, random_target: bool = True, target_position: Optional[np.ndarray] = None, velocity_tolerance: float = 0.05, speed_limit_enabled: bool = False):
super().__init__(render_mode, random_target, target_position, velocity_tolerance, speed_limit_enabled)
self._wheel_speed_limits = np.array([40.0, 40.0, 40.0, 40.0])
def _define_action_space(self):
return spaces.Box(low=-160.0, high=160.0, shape=(4,), dtype=np.float32)
@staticmethod
def _F():
l1 = np.array([+0.5, +0.5]); f1 = np.array([+1, +1])/np.sqrt(2); d1 = np.array([+1, 0])
l2 = np.array([-0.5, +0.5]); f2 = np.array([-1, +1])/np.sqrt(2); d2 = np.array([+1, 0])
l3 = np.array([-0.5, -0.5]); f3 = np.array([-1, -1])/np.sqrt(2); d3 = np.array([-1, 0])
l4 = np.array([+0.5, -0.5]); f4 = np.array([+1, -1])/np.sqrt(2); d4 = np.array([-1, 0])
r = 0.1
J11 = f1[1]/(r*d1[0]*f1[1]-r*d1[1]*f1[0])
J21 = f2[1]/(r*d2[0]*f2[1]-r*d2[1]*f2[0])
J31 = f3[1]/(r*d3[0]*f3[1]-r*d3[1]*f3[0])
J41 = f4[1]/(r*d4[0]*f4[1]-r*d4[1]*f4[0])
J12 = -f1[0]/(r*d1[0]*f1[1]-r*d1[1]*f1[0])
J22 = -f2[0]/(r*d2[0]*f2[1]-r*d2[1]*f2[0])
J32 = -f3[0]/(r*d3[0]*f3[1]-r*d3[1]*f3[0])
J42 = -f4[0]/(r*d4[0]*f4[1]-r*d4[1]*f4[0])
J13 = (-f1[1]*l1[1]-f1[0]*l1[0])/(r*d1[0]*f1[1]-r*d1[1]*f1[0])
J23 = (-f2[1]*l2[1]-f2[0]*l2[0])/(r*d2[0]*f2[1]-r*d2[1]*f2[0])
J33 = (-f3[1]*l3[1]-f3[0]*l3[0])/(r*d3[0]*f3[1]-r*d3[1]*f3[0])
J43 = (-f4[1]*l4[1]-f4[0]*l4[0])/(r*d4[0]*f4[1]-r*d4[1]*f4[0])
J = np.array([[J11, J12, J13], [J21, J22, J23], [J31, J32, J33], [J41, J42, J43]])
F = np.linalg.pinv(J)
return F
class TankRobotEnv(WheeledRobotEnv):
"""
Tank-drive (differential drive) robot environment using two wheels.
This environment simulates a robot with two independently controlled wheels
on opposite sides. The robot moves forward/backward when both wheels rotate
in the same direction, and turns when wheels rotate at different speeds.
The robot cannot move sideways and must rotate to change direction.
The action space consists of 2 wheel velocities, which are converted to
robot motion through the kinematic transformation matrix F using the
Moore-Penrose pseudoinverse.
Wheel configuration:
- Left wheel: contributes to forward motion and rotation
- Right wheel: contributes to forward motion and rotation
- Wheel base b=1.0 (distance between wheels)
- Wheel radius r=0.1
Action space: Box(2,) representing left and right wheel velocities [-100.0, 100.0]
"""
def __init__(self, render_mode: Optional[str] = None, random_target: bool = True, target_position: Optional[np.ndarray] = None, velocity_tolerance: float = 0.05, speed_limit_enabled: bool = False):
super().__init__(render_mode, random_target, target_position, velocity_tolerance, speed_limit_enabled)
self._wheel_speed_limits = np.array([50.0, 50.0])
def _define_action_space(self):
return spaces.Box(low=-100.0, high=100.0, shape=(2,), dtype=np.float32)
@staticmethod
def _F():
b = 1
r = 0.1
J = np.array([
[+1/r, 0, -b/(2*r)],
[-1/r, 0, -b/(2*r)]
])
F = np.linalg.pinv(J)
return F
class AckermanRobotEnv(WheeledRobotEnv):
"""
Ackermann steering robot environment using bicycle kinematics.
This environment simulates a car-like robot with Ackermann steering geometry.
The robot has a front steering wheel and rear driving wheel, similar to a
bicycle model. It moves forward/backward and steers by changing the front
wheel angle.
Unlike other robot types, this class overrides the step() method to implement
bicycle kinematics instead of using the standard F matrix approach.
Vehicle parameters:
- Wheel radius r=0.1
- Uses bicycle kinematics model
- Steering angle affects turning radius
Action space: Box(2,) with:
- action[0]: driving velocity [-5.0, 5.0]
- action[1]: steering angle [-1.0, 1.0] radians
The robot cannot move sideways and follows curved paths based on the
steering angle and forward velocity.
"""
def __init__(self, render_mode: Optional[str] = None, random_target: bool = True, target_position: Optional[np.ndarray] = None, velocity_tolerance: float = 0.05, speed_limit_enabled: bool = False):
super().__init__(render_mode, random_target, target_position, velocity_tolerance, speed_limit_enabled)
# Set hardcoded limits with 10% margin above max observed (velocity: 1.3, steering: 0.6)
self._wheel_speed_limits = np.array([4, 2])
def _define_action_space(self):
return spaces.Box(low=np.array([-10.0, -1.0]), high=np.array([10.0, 1.0]), dtype=np.float32)
def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, Dict[str, Any]]:
# if isinstance(self.action_space, spaces.Box):
# action = np.clip(action, self.action_space.low, self.action_space.high)
# Apply speed limits for Ackerman robot (velocity, steering angle)
action = self._apply_wheel_speed_limits(action.reshape(-1, 1)).flatten()
r = 0.1
dU = action.reshape(-1, 1)
X = self.state.reshape(-1, 1)
if dU[1, 0] != 0:
dX = np.array([[r*dU[0,0]], [0], [r*dU[0,0]/dU[1,0]]])
else:
dX = np.array([[r*dU[0,0]], [0], [0]])
Xp = X + self._R(X[2, 0]) @ dX * self.dt
previous_state = self.state.copy()
self.state = Xp.flatten().astype(np.float32)
self.state[2] = np.mod(self.state[2], 2*np.pi) # Wrap theta to [0, 2π]
self.state_derivative = ((self.state - previous_state) / self.dt).astype(np.float32)
self.current_step += 1
self.history.append(self.state.copy())
self.time_history.append(self.current_step * self.dt)
reward = self._compute_reward(action)
terminated = self._is_terminated()
truncated = self.current_step >= self.max_steps
observation = np.concatenate([self.state, self.state_derivative, [self.current_step * self.dt], self.target_position])
return observation.astype(np.float32), reward, terminated, truncated, {}