lawarob-wheeled/Sol9_tank_l1.py

#!/usr/bin/env python

import numpy as np
from WheeledRobot import TankRobotEnv

# TankRobot kinematics
b = 1
r = 0.1

# Wheel equations
J = np.array([
    [+1/r, 0, -b/(2*r)],
    [-1/r, 0, -b/(2*r)]
])
F = np.linalg.pinv(J)

def controller_tank_l1(t, X_I, dX_I, target_position):
    """L1 adaptive control with path planning and dynamic target"""

    # Controller parameters
    dx_R_max = 3
    L0 = 4
    L1 = 4

    # Points & vectors
    A = np.array([[X_I[0,0]],[X_I[1,0]]])
    B = target_position[:2].reshape(-1, 1)  # Use dynamic target (x, y only)
    # d = np.array([[1],[1]])
    # d = np.array([[np.cos(X_I[2,0])],[np.sin(X_I[2,0])]])
    d = np.array([[-1],[0]])
    d = d / np.linalg.norm(d) # normalize
    C = (np.transpose(A-B) @ d) * d + B

    # L0 controller
    D = C + L0 * d

    # L1 controller
    # D = C + np.sqrt( L1**2 - np.linalg.norm(C-A)**2 ) * d

    # Turning radius
    eta = np.arctan2( (D-A)[1,0], (D-A)[0,0] ) - X_I[2,0]
    omega = 2*np.sin(eta)*dx_R_max/np.linalg.norm(D-A)
    dX_R_des = np.array([[dx_R_max], [0], [omega]])

    # Stopping condition
    if np.linalg.norm(A-B) < 0.2:
        dX_R_des = np.array([[0], [0], [0]])

    # Calculate control input
    U = J @ dX_R_des
    return U

def run_simulation():
    """Run simulation using Gymnasium environment with L1 adaptive control to dynamic target"""

    # Initialize environment with fixed target for reproducible results
    # You can set random_target=True for random target generation
    env = TankRobotEnv(render_mode="human", random_target=False)
    observation, _ = env.reset()

    # Expand render bounds to show target position (default target is at [10,5])
    env.set_render_bounds((-2, 12), (-2, 8))

    print("Starting Tank Robot L1 Adaptive Control Simulation")
    print("Controller: L1 adaptive control with path planning to dynamic target")
    print(f"Target position: [{observation[7]:.2f}, {observation[8]:.2f}, {observation[9]:.2f}]")

    for step in range(10000):
        # Extract controller inputs from observation
        # New observation format: [x, y, theta, dx, dy, dtheta, time, target_x, target_y, target_theta]
        time = observation[6]                    # Current time
        X_I = observation[:3].reshape(-1, 1)     # State [x, y, theta]
        dX_I = observation[3:6].reshape(-1, 1)   # Derivatives [dx, dy, dtheta]
        target_position = observation[7:10]      # Target [target_x, target_y, target_theta]

        # Call L1 adaptive controller with dynamic target
        U = controller_tank_l1(time, X_I, dX_I, target_position)

        print(X_I)

        # Step environment
        observation, reward, terminated, truncated, _ = env.step(U.flatten())

        # Render the environment
        env.render()

        # Check if target reached
        if terminated:
            print(f"Target reached at step {step}! Reward: {reward:.2f}")
            break
        elif truncated:
            print(f"Maximum steps reached at step {step}")
            break

    input("Press Enter to close the simulation window...")
    env.close()
    print("Simulation completed")

if __name__ == "__main__":
    run_simulation()