\documentclass{beamer} % Theme and Color \usetheme{Madrid} \usecolortheme{default} % Packages \usepackage[utf8]{inputenc} \usepackage[T1]{fontenc} \usepackage{amsmath, amssymb, amsfonts} \usepackage{booktabs} \usepackage{graphicx} \usepackage{hyperref} \usepackage{bm} % For bold math symbols % Custom commands from the source text for consistency \newcommand{\KL}{D_{\mathrm{KL}}} \def\figref#1{Figure~\ref{#1}} \title[Meta-Safe RL]{A CMDP-within-online framework for Meta-Safe Reinforcement Learning} \author{Vanshaj Khattar\inst{1} \and Yuhao Ding\inst{2} \and Bilgehan Sel\inst{1} \and Javad Lavaei\inst{2} \and Ming Jin\inst{1}} \institute[VT \& UCB]{ \inst{1} Virginia Tech \\ \inst{2} UC Berkeley } \date{\today} \setbeamerfont{caption}{size=\scriptsize} \begin{document} \begin{frame}{Experimental Results: MuJoCo Environments} \centering \textbf{Half-Cheetah (Low Task-Similarity)} \begin{figure} \includegraphics[width=0.4\textwidth]{HalfCheetah/HalfCheetahReward_low_task_similarity_broken_axis.pdf} \includegraphics[width=0.4\textwidth]{HalfCheetah/HalfCheetahCost_low_task_similarity.pdf} \caption{Reward (top) and constraint violation (bottom) for Half-Cheetah. Our method (Meta-SRL) learns a high-reward policy while keeping the constraint violation below the threshold (blue line).} \label{fig:halfcheetah} \end{figure} \end{frame} \end{document}