Added The WheeledRobot env

2025-09-23 11:27:29 +02:00 · 2025-09-23 11:27:29 +02:00 · 00e5a5e84e
commit 00e5a5e84e
parent 55bb2734d0
1 changed files with 510 additions and 0 deletions
--- a/WheeledRobot.py
+++ b/WheeledRobot.py
@ -0,0 +1,510 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.animation import FuncAnimation
+import gymnasium as gym
+from gymnasium import spaces
+from typing import Any, Dict, Optional, Tuple
+
+
+class WheeledRobotEnv(gym.Env):
+    """
+    Base gymnasium environment for wheeled robot simulation.
+    
+    This environment provides a foundation for simulating various types of wheeled robots
+    in a 2D plane. The robot state consists of (x, y, theta) representing position and
+    orientation. The environment supports different rendering modes and maintains a
+    history of robot positions for visualization.
+    
+    Attributes:
+        dt (float): Time step for simulation (default: 0.05)
+        max_steps (int): Maximum number of simulation steps (default: 500)
+        state (np.ndarray): Current robot state [x, y, theta]
+        history (list): List of historical states for trajectory visualization
+        observation_space (spaces.Box): Valid observation space bounds
+        action_space (spaces.Space): Valid action space (defined by subclasses)
+        render_mode (str): Rendering mode ("human", "rgb_array", or None)
+    
+    The coordinate system uses:
+        - x: horizontal position
+        - y: vertical position  
+        - theta: orientation angle in radians
+    """
+    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 20}
+    
+    def __init__(self, render_mode: Optional[str] = None, random_target: bool = True, target_position: Optional[np.ndarray] = None, velocity_tolerance: float = 0.05):
+        super().__init__()
+        
+        self.dt = 0.05
+        self.max_steps = 400
+        self.current_step = 0
+        
+        self.state = np.array([0.0, 0.0, 0.0], dtype=np.float32)
+        self.state_derivative = np.array([0.0, 0.0, 0.0], dtype=np.float32)
+        self.history = []
+        self.time_history = []
+        
+        # Target position configuration
+        self.random_target = random_target
+        self.fixed_target = target_position if target_position is not None else np.array([10.0, 5.0, np.pi], dtype=np.float32)
+        self.target_position = np.array([0.0, 0.0, 0.0], dtype=np.float32)
+        
+        # Target reaching parameters
+        self.position_tolerance = 0.2  # meters
+        self.orientation_tolerance = 0.1  # radians
+        self.velocity_tolerance = velocity_tolerance  # units/step for "stopped" detection
+        self.target_reached_reward = 100.0
+        
+        # Expanded observation space: [x, y, theta, dx, dy, dtheta, time, target_x, target_y, target_theta]
+        self.observation_space = spaces.Box(
+            low=np.array([-15.0, -10.0, -np.pi, -15.0, -10.0, -10.0, 0.0, -15.0, -10.0, -np.pi], dtype=np.float32),
+            high=np.array([15.0, 10.0, np.pi, 15.0, 10.0, 10.0, 1000.0, 15.0, 10.0, np.pi], dtype=np.float32),
+            dtype=np.float32
+        )
+        
+        self.action_space = self._define_action_space()
+        
+        self.render_mode = render_mode
+        self.h_fig = None
+        self.h_ax = None
+        
+        # Default render bounds
+        self.render_xlim = (-5, 5)
+        self.render_ylim = (-2, 8)
+        
+        self.F = self._F()
+
+    def _define_action_space(self):
+        return spaces.Box(low=-5.0, high=5.0, shape=(3,), dtype=np.float32)
+
+    @staticmethod
+    def _F():
+        return np.eye(3)
+
+    @staticmethod
+    def _R(theta):
+        return np.array([[np.cos(theta), -np.sin(theta), 0.0],
+                         [np.sin(theta), np.cos(theta), 0.0],
+                         [0, 0, 1]])
+
+    def _generate_target_position(self) -> np.ndarray:
+        """Generate a new target position"""
+        if self.random_target:
+            # Generate random target within bounds, avoiding robot start position
+            x = np.random.uniform(-10.0, 10.0)
+            y = np.random.uniform(-5.0, 8.0)
+            theta = np.random.uniform(0, 2*np.pi)
+            
+            # Ensure target is not too close to start position (0,0,0)
+            while np.sqrt(x**2 + y**2) < 2.0:
+                x = np.random.uniform(-10.0, 10.0)
+                y = np.random.uniform(-5.0, 8.0)
+                
+            return np.array([x, y, theta], dtype=np.float32)
+        else:
+            return self.fixed_target.copy()
+
+    def _is_target_reached(self) -> bool:
+        """Check if robot has reached the target within tolerance AND is stopped"""
+        pos_error = np.abs(self.state[:2] - self.target_position[:2])
+        angle_error = np.abs(np.mod(self.state[2] - self.target_position[2] + np.pi, 2*np.pi) - np.pi)
+        robot_velocity = np.linalg.norm(self.state_derivative)
+        
+        return (pos_error[0] < self.position_tolerance and 
+                pos_error[1] < self.position_tolerance and 
+                angle_error < self.orientation_tolerance and
+                robot_velocity < self.velocity_tolerance)
+
+    def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]:
+        if seed is not None:
+            np.random.seed(seed)
+        
+        self.state = np.array([0.0, 0.0, 0.0], dtype=np.float32)
+        self.state_derivative = np.array([0.0, 0.0, 0.0], dtype=np.float32)
+        self.current_step = 0
+        self.history = [self.state.copy()]
+        self.time_history = [0.0]
+        
+        # Generate new target position
+        self.target_position = self._generate_target_position()
+        
+        # Include target in observation: [state, state_derivative, time, target]
+        observation = np.concatenate([self.state, self.state_derivative, [self.current_step * self.dt], self.target_position])
+        return observation.astype(np.float32), {}
+
+    def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, Dict[str, Any]]:
+        # if isinstance(self.action_space, spaces.Box):
+        #     action = np.clip(action, self.action_space.low, self.action_space.high)
+        
+        U = action.reshape(-1, 1)
+        X = self.state.reshape(-1, 1)
+        
+        # U represents wheel velocities from controllers
+        # F converts wheel velocities to robot velocities
+        robot_velocities = self.F @ U
+        Xp = X + self._R(X[2, 0]) @ robot_velocities * self.dt
+        
+        previous_state = self.state.copy()
+        self.state = Xp.flatten().astype(np.float32)
+        self.state[2] = np.mod(self.state[2], 2*np.pi)  # Wrap theta to [0, 2π]
+        self.state_derivative = ((self.state - previous_state) / self.dt).astype(np.float32)
+        self.current_step += 1
+        self.history.append(self.state.copy())
+        self.time_history.append(self.current_step * self.dt)
+        
+        reward = self._compute_reward(action)
+        terminated = self._is_terminated()
+        truncated = self.current_step >= self.max_steps
+        
+        observation = np.concatenate([self.state, self.state_derivative, [self.current_step * self.dt], self.target_position])
+        return observation.astype(np.float32), reward, terminated, truncated, {}
+
+    def _compute_reward(self, action: np.ndarray) -> float:
+        # Base action penalty
+        base_reward = -0.01 * np.sum(action**2)
+        
+        # Check if target is reached
+        position_error = np.linalg.norm(self.state[:2] - self.target_position[:2])
+        orientation_error = abs(self.state[2] - self.target_position[2])
+        
+        # Handle angle wrapping for orientation error
+        orientation_error = min(orientation_error, 2*np.pi - orientation_error)
+        
+        # Check if robot is both at target AND stopped
+        robot_velocity = np.linalg.norm(self.state_derivative)
+        
+        # Target reached bonus (requires position, orientation AND velocity criteria)
+        if (position_error < self.position_tolerance and 
+            orientation_error < self.orientation_tolerance and
+            robot_velocity < self.velocity_tolerance):
+            return base_reward + self.target_reached_reward
+        
+        return base_reward
+
+    def _is_terminated(self) -> bool:
+        return self._is_target_reached()
+
+    def set_render_bounds(self, xlim: Tuple[float, float], ylim: Tuple[float, float]) -> None:
+        """
+        Set the coordinate bounds for rendering.
+        
+        Args:
+            xlim: Tuple of (x_min, x_max) for x-axis limits
+            ylim: Tuple of (y_min, y_max) for y-axis limits
+        """
+        self.render_xlim = xlim
+        self.render_ylim = ylim
+
+    def render(self):
+        if self.render_mode == "human":
+            self._render_human()
+        elif self.render_mode == "rgb_array":
+            return self._render_rgb_array()
+
+    def _render_human(self):
+        if self.h_fig is None:
+            self._create_figure()
+        
+        if len(self.history) > 1 and self.h_ax is not None:
+            history_array = np.array(self.history)
+            time_array = np.array(self.time_history)
+            current_time = self.current_step * self.dt
+            
+            # Clear all subplots
+            for ax in self.h_ax:
+                ax.clear()
+            
+            # Main trajectory plot (left panel)
+            self.h_ax[0].plot(history_array[:, 0], history_array[:, 1], 'b-', alpha=0.7, label='Trajectory')
+            self.h_ax[0].plot(self.state[0], self.state[1], 'ro', markersize=8, label='Current Position')
+            
+            # Draw target position
+            self.h_ax[0].plot(self.target_position[0], self.target_position[1], 'gs', markersize=10, label='Target Position')
+            target_tip_points = self._get_robot_axis_points(self.target_position)
+            self.h_ax[0].plot(target_tip_points[0, :2], target_tip_points[1, :2], 'g--', linewidth=1, alpha=0.7)
+            self.h_ax[0].plot(target_tip_points[0, [0, 2]], target_tip_points[1, [0, 2]], 'g--', linewidth=1, alpha=0.7)
+            
+            tip_points = self._get_robot_axis_points(self.state)
+            self.h_ax[0].plot(tip_points[0, :2], tip_points[1, :2], 'r-', linewidth=2, label='X-axis')
+            self.h_ax[0].plot(tip_points[0, [0, 2]], tip_points[1, [0, 2]], 'g-', linewidth=2, label='Y-axis')
+            
+            self.h_ax[0].set_xlim(*self.render_xlim)
+            self.h_ax[0].set_ylim(*self.render_ylim)
+            self.h_ax[0].set_aspect('equal', adjustable='box')
+            self.h_ax[0].grid(True)
+            self.h_ax[0].set_xlabel('x [m]')
+            self.h_ax[0].set_ylabel('y [m]')
+            self.h_ax[0].set_title(f'Robot Position (Time: {current_time:04.2f}s -- Step: {self.current_step:03d})')
+            
+            # Time-series plots (right panels)
+            labels = ['x [m]', 'y [m]', 'θ [rad]']
+            for i, (ax, label) in enumerate(zip(self.h_ax[1:], labels)):
+                ax.plot(time_array, history_array[:, i], 'k-', linewidth=1)
+                ax.plot(current_time, self.state[i], 'ro', markersize=6)
+                ax.set_xlabel('Time [s]')
+                ax.set_ylabel(label)
+                ax.grid(True)
+                ax.set_xlim(0, max(current_time + 1, 5))
+                
+                # Fixed y-axis ranges for time-series plots
+                if i == 0:  # x position
+                    ax.set_ylim(*self.render_xlim)  # Use same range as main plot
+                elif i == 1:  # y position
+                    ax.set_ylim(*self.render_ylim)  # Use same range as main plot
+                elif i == 2:  # theta angle
+                    ax.set_ylim(0, 2*np.pi)  # Fixed range for full rotation
+            
+            plt.tight_layout()
+            plt.pause(self.dt)
+
+    def _render_rgb_array(self):
+        return None
+
+    def _create_figure(self):
+        self.h_fig = plt.figure(figsize=(10, 5.5))
+        
+        # Create multi-panel layout matching original
+        self.h_ax = [
+            self.h_fig.add_subplot(1, 2, 1),  # Main trajectory plot
+            self.h_fig.add_subplot(3, 2, 2),  # x vs time
+            self.h_fig.add_subplot(3, 2, 4),  # y vs time  
+            self.h_fig.add_subplot(3, 2, 6)   # theta vs time
+        ]
+        
+        # Configure main trajectory plot
+        self.h_ax[0].set_xlabel('x [m]')
+        self.h_ax[0].set_ylabel('y [m]')
+        self.h_ax[0].set_title('Wheeled Robot')
+        self.h_ax[0].axis('equal')
+        self.h_ax[0].grid(True)
+        self.h_ax[0].set_xlim(*self.render_xlim)
+        self.h_ax[0].set_ylim(*self.render_ylim)
+        
+        # Configure time-series plots
+        labels = ['x [m]', 'y [m]', 'θ [rad]']
+        for i, (ax, label) in enumerate(zip(self.h_ax[1:], labels)):
+            ax.set_xlabel('Time [s]')
+            ax.set_ylabel(label)
+            ax.grid(True)
+        
+        plt.tight_layout()
+        plt.ion()
+
+    def _get_robot_axis_points(self, state):
+        tip_points = np.zeros((2, 3), dtype=float)
+        tip_points[:, 0] = state[:2]
+        Rxy = self._R(state[2])[:2, :2]
+        tip_points[:, [1]] = Rxy @ np.array([[0.7], [0]]) + tip_points[:, [0]]
+        tip_points[:, [2]] = Rxy @ np.array([[0], [0.7]]) + tip_points[:, [0]]
+        return tip_points
+
+    def close(self):
+        if self.h_fig is not None:
+            plt.close(self.h_fig)
+            self.h_fig = None
+            self.h_ax = None
+
+class OmniRobotEnv(WheeledRobotEnv):
+    """
+    Omnidirectional robot environment using three-wheel omnidirectional drive.
+    
+    This environment simulates a robot with three omnidirectional wheels arranged
+    at 120-degree intervals. The robot can move in any direction and rotate
+    simultaneously without changing orientation first.
+    
+    The action space consists of 3 wheel velocities, which are converted to
+    robot motion through the kinematic transformation matrix F.
+    
+    Wheel configuration:
+        - Wheel 1: 0° (front)
+        - Wheel 2: 120° (rear left) 
+        - Wheel 3: 240° (rear right)
+        - All wheels at distance l=0.5 from center
+        - Wheel radius r=0.1
+    
+    Action space: Box(3,) representing wheel velocities [-100.0, 100.0]
+    """
+
+    def __init__(self, render_mode: Optional[str] = None, random_target: bool = True, target_position: Optional[np.ndarray] = None, velocity_tolerance: float = 0.05):
+        super().__init__(render_mode, random_target, target_position, velocity_tolerance)
+
+    def _define_action_space(self):
+        return spaces.Box(low=-100.0, high=100.0, shape=(3,), dtype=np.float32)
+
+    @staticmethod
+    def _F():
+        a1 = 0.0;         b1 = 0.0; l1 = 0.5
+        a2 = (2/3)*np.pi; b2 = 0.0; l2 = 0.5
+        a3 = (4/3)*np.pi; b3 = 0.0; l3 = 0.5
+        r = 0.1
+
+        J = np.array([
+            [1/r*np.sin(a1+b1), -1/r*np.cos(a1+b1), -l1/r*np.cos(b1)],
+            [1/r*np.sin(a2+b2), -1/r*np.cos(a2+b2), -l2/r*np.cos(b2)],
+            [1/r*np.sin(a3+b3), -1/r*np.cos(a3+b3), -l3/r*np.cos(b3)]
+        ])
+        F = np.linalg.inv(J)
+        
+        return F
+
+
+class MecanumRobotEnv(WheeledRobotEnv):
+    """
+    Mecanum wheel robot environment using four-wheel mecanum drive.
+    
+    This environment simulates a robot with four mecanum wheels arranged in a
+    rectangular configuration. Each wheel has rollers at 45° angles, allowing
+    the robot to move in any direction and rotate simultaneously.
+    
+    The action space consists of 4 wheel velocities for the mecanum wheels,
+    which are converted to robot motion through the kinematic transformation
+    matrix F using the Moore-Penrose pseudoinverse.
+    
+    Wheel configuration:
+        - Front-left wheel: position (+0.5, +0.5), roller direction (+1, +1)/√2
+        - Front-right wheel: position (-0.5, +0.5), roller direction (-1, +1)/√2  
+        - Rear-left wheel: position (-0.5, -0.5), roller direction (-1, -1)/√2
+        - Rear-right wheel: position (+0.5, -0.5), roller direction (+1, -1)/√2
+        - All wheels have radius r=0.1
+    
+    Action space: Box(4,) representing wheel velocities [-160.0, 160.0]
+    """
+
+    def __init__(self, render_mode: Optional[str] = None, random_target: bool = True, target_position: Optional[np.ndarray] = None, velocity_tolerance: float = 0.05):
+        super().__init__(render_mode, random_target, target_position, velocity_tolerance)
+
+    def _define_action_space(self):
+        return spaces.Box(low=-160.0, high=160.0, shape=(4,), dtype=np.float32)
+
+    @staticmethod
+    def _F():
+        l1 = np.array([+0.5, +0.5]); f1 = np.array([+1, +1])/np.sqrt(2); d1 = np.array([+1, 0])
+        l2 = np.array([-0.5, +0.5]); f2 = np.array([-1, +1])/np.sqrt(2); d2 = np.array([+1, 0])
+        l3 = np.array([-0.5, -0.5]); f3 = np.array([-1, -1])/np.sqrt(2); d3 = np.array([-1, 0])
+        l4 = np.array([+0.5, -0.5]); f4 = np.array([+1, -1])/np.sqrt(2); d4 = np.array([-1, 0])
+        r = 0.1
+
+        J11 = f1[1]/(r*d1[0]*f1[1]-r*d1[1]*f1[0])
+        J21 = f2[1]/(r*d2[0]*f2[1]-r*d2[1]*f2[0])
+        J31 = f3[1]/(r*d3[0]*f3[1]-r*d3[1]*f3[0])
+        J41 = f4[1]/(r*d4[0]*f4[1]-r*d4[1]*f4[0])
+        J12 = -f1[0]/(r*d1[0]*f1[1]-r*d1[1]*f1[0])
+        J22 = -f2[0]/(r*d2[0]*f2[1]-r*d2[1]*f2[0])
+        J32 = -f3[0]/(r*d3[0]*f3[1]-r*d3[1]*f3[0])
+        J42 = -f4[0]/(r*d4[0]*f4[1]-r*d4[1]*f4[0])
+        J13 = (-f1[1]*l1[1]-f1[0]*l1[0])/(r*d1[0]*f1[1]-r*d1[1]*f1[0])
+        J23 = (-f2[1]*l2[1]-f2[0]*l2[0])/(r*d2[0]*f2[1]-r*d2[1]*f2[0])
+        J33 = (-f3[1]*l3[1]-f3[0]*l3[0])/(r*d3[0]*f3[1]-r*d3[1]*f3[0])
+        J43 = (-f4[1]*l4[1]-f4[0]*l4[0])/(r*d4[0]*f4[1]-r*d4[1]*f4[0])
+        J = np.array([[J11, J12, J13], [J21, J22, J23], [J31, J32, J33], [J41, J42, J43]])
+
+        F = np.linalg.pinv(J)
+        return F
+
+
+class TankRobotEnv(WheeledRobotEnv):
+    """
+    Tank-drive (differential drive) robot environment using two wheels.
+    
+    This environment simulates a robot with two independently controlled wheels
+    on opposite sides. The robot moves forward/backward when both wheels rotate
+    in the same direction, and turns when wheels rotate at different speeds.
+    
+    The robot cannot move sideways and must rotate to change direction.
+    The action space consists of 2 wheel velocities, which are converted to
+    robot motion through the kinematic transformation matrix F using the
+    Moore-Penrose pseudoinverse.
+    
+    Wheel configuration:
+        - Left wheel: contributes to forward motion and rotation
+        - Right wheel: contributes to forward motion and rotation  
+        - Wheel base b=1.0 (distance between wheels)
+        - Wheel radius r=0.1
+    
+    Action space: Box(2,) representing left and right wheel velocities [-100.0, 100.0]
+    """
+
+    def __init__(self, render_mode: Optional[str] = None, random_target: bool = True, target_position: Optional[np.ndarray] = None, velocity_tolerance: float = 0.05):
+        super().__init__(render_mode, random_target, target_position, velocity_tolerance)
+
+    def _define_action_space(self):
+        return spaces.Box(low=-100.0, high=100.0, shape=(2,), dtype=np.float32)
+
+    @staticmethod
+    def _F():
+        b = 1
+        r = 0.1
+
+        J = np.array([
+            [+1/r, 0, -b/(2*r)],
+            [-1/r, 0, -b/(2*r)]
+        ])
+        F = np.linalg.pinv(J)
+
+        return F
+
+
+class AckermanRobotEnv(WheeledRobotEnv):
+    """
+    Ackermann steering robot environment using bicycle kinematics.
+    
+    This environment simulates a car-like robot with Ackermann steering geometry.
+    The robot has a front steering wheel and rear driving wheel, similar to a
+    bicycle model. It moves forward/backward and steers by changing the front
+    wheel angle.
+    
+    Unlike other robot types, this class overrides the step() method to implement
+    bicycle kinematics instead of using the standard F matrix approach.
+    
+    Vehicle parameters:
+        - Wheel radius r=0.1
+        - Uses bicycle kinematics model
+        - Steering angle affects turning radius
+    
+    Action space: Box(2,) with:
+        - action[0]: driving velocity [-5.0, 5.0] 
+        - action[1]: steering angle [-1.0, 1.0] radians
+    
+    The robot cannot move sideways and follows curved paths based on the
+    steering angle and forward velocity.
+    """
+
+    def __init__(self, render_mode: Optional[str] = None, random_target: bool = True, target_position: Optional[np.ndarray] = None, velocity_tolerance: float = 0.05):
+        super().__init__(render_mode, random_target, target_position, velocity_tolerance)
+
+    def _define_action_space(self):
+        return spaces.Box(low=np.array([-10.0, -1.0]), high=np.array([10.0, 1.0]), dtype=np.float32)
+
+    def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, Dict[str, Any]]:
+        # if isinstance(self.action_space, spaces.Box):
+        #     action = np.clip(action, self.action_space.low, self.action_space.high)
+        
+        r = 0.1
+        dU = action.reshape(-1, 1)
+        X = self.state.reshape(-1, 1)
+        
+        if dU[1, 0] != 0:
+            dX = np.array([[r*dU[0,0]], [0], [r*dU[0,0]/dU[1,0]]])
+        else:
+            dX = np.array([[r*dU[0,0]], [0], [0]])
+            
+        Xp = X + self._R(X[2, 0]) @ dX * self.dt
+        
+        previous_state = self.state.copy()
+        self.state = Xp.flatten().astype(np.float32)
+        self.state[2] = np.mod(self.state[2], 2*np.pi)  # Wrap theta to [0, 2π]
+        self.state_derivative = ((self.state - previous_state) / self.dt).astype(np.float32)
+        self.current_step += 1
+        self.history.append(self.state.copy())
+        self.time_history.append(self.current_step * self.dt)
+        
+        reward = self._compute_reward(action)
+        terminated = self._is_terminated()
+        truncated = self.current_step >= self.max_steps
+        
+        observation = np.concatenate([self.state, self.state_derivative, [self.current_step * self.dt], self.target_position])
+        return observation.astype(np.float32), reward, terminated, truncated, {}
+
+
+
+
+