Automatic Control Knowledge Repository

You currently have javascript disabled. Some features will be unavailable. Please consider enabling javascript.

Details for: "reinforcement learning for cartpole system"

Name: reinforcement learning for cartpole system (Key: P5MZW)
Path: ackrep_data/problem_solutions/PPO_cartpole_system View on GitHub
Type: problem_solution
Short Description:
Created: 2023-07-07
Compatible Environment: default_conda_environment (Key: CDAMA)
Source Code [ / ] solution.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
LQR controller design consists of 4 steps:
1. linearize the non-linear system around the equilibrium point.
2. specify weigh matrices
3. calculate state feedback
4. check whether the system have the desired behavior
"""
# try:
#     import method_LQR as mlqr  # noqa
#     import method_system_property as msp  # noqa
# except ImportError:
#     from method_packages.method_LQR import method_LQR as mlqr
#     from method_packages.method_system_property import method_system_property as msp

import matplotlib.pyplot as plt
import symbtools as st
from scipy.integrate import odeint
import sympy as sp
import numpy as np
import os
from ackrep_core.system_model_management import save_plot_in_dir
from stable_baselines3 import PPO
from ipydex import IPS


class SolutionData:
    pass


def solve(problem_spec, kwargs=None):

    env = problem_spec.env

    model = PPO(policy="MlpPolicy", env=env)

    model.learn(30)

    path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "_data", "model.h5")
    print(path)
    model.save(path)

    env.render_mode = "human"
    obs_list = []
    obs, _ = env.reset()
    done = trunc = False
    for i in range(300):
        obs_list.append(obs)
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, trunc, info = env.step(action)
        if done or trunc:
            obs, _ = env.reset()


    solution_data = SolutionData()
    solution_data.xx = np.array(env.state)  # states of system
    solution_data.res = np.array(obs_list)

    save_plot(problem_spec, solution_data)

    return solution_data


def save_plot(problem_spec, solution_data):
    titles = ["x1", "x2", "x1_dot", "x2_dot"]
    # simulation for LQR
    plt.figure(1)
    for i in range(4):
        plt.subplot(2, 2, i + 1)
        plt.plot(np.arange(solution_data.res.shape[0]), solution_data.res[:, i], color="k", linewidth=1)
        plt.grid(1)
        plt.title(titles[i])
        plt.xlabel("time t/s")
        if i == 0:
            plt.ylabel("position [m]")
        elif i == 1:
            plt.ylabel("angular position [rad]")
        elif i == 2:
            plt.ylabel("velocity [m/s]")
        else:
            plt.ylabel("angular velocity [rad/s]")
    plt.tight_layout()
    save_plot_in_dir("plot.png")


test.py
import os
import numpy as np

from stable_baselines3 import PPO
import gymnasium as gym

from ipydex import IPS

env = gym.make('CartPole-v1', render_mode=None)

IPS()
model = PPO(policy="MlpPolicy", env=env)

model.learn(30000)

path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "_data", "model.h5")
print(path)
model.save(path)

env = gym.make('CartPole-v1', render_mode="human")
obs_list = []
obs, _ = env.reset()
done = trunc = False
for i in range(300):
    obs_list.append(obs)
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, done, trunc, info = env.step(action)
    if done or trunc:
        obs, _ = env.reset()

IPS()

Solved Problems: reinforcement learning for cartpole system   |  
Used Methods:
Result: Unknown. (Entity was not included in any CI Job.)
Runtime: (estimated: 10s)