From d8a818331d1f4bac8bd10f715c225d0d4bdd4953 Mon Sep 17 00:00:00 2001 From: "A380-N-9025E0\\Tilk" <christian.tilk@univie.ac.at> Date: Mon, 7 Apr 2025 13:13:46 +0200 Subject: [PATCH] final version --- Main.tex | 222 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 125 insertions(+), 97 deletions(-) diff --git a/Main.tex b/Main.tex index 129ed19..ce76de2 100644 --- a/Main.tex +++ b/Main.tex @@ -204,13 +204,13 @@ \frame{ \frametitle{Supervised Learning} \begin{columns}[T] - \begin{column}{0.68\textwidth} + \begin{column}{0.6\textwidth} \phantom{a}\\\medskip \tikzstyle TestEdge=[-Triangle,line width=3pt,shorten >=3pt,green!40!black,dotted] \tikzstyle TrainEdge=[-Triangle,thick,shorten >=3pt,line width=3pt] \tikzset{ Legend/.style={draw=white, top color=white, bottom color=white, inner sep=0.5em, minimum height=1cm,text width=10mm, align = left}} -\scalebox{0.7}{ +\scalebox{0.72}{ \begin{tikzpicture}[->, node distance=0.8cm, auto] \small \node[Legend] (Inst) {};%Trainingsdatenpunkte $i=1 \dots n$ @@ -219,8 +219,8 @@ \node[mynode,below=1.5cm of Inst,align=center] (ML) {ML-Model }; \node[mynode,below=4cm of Inst,align=center] (Pred) {Trained ML-Model}; -\node[mynode,below left =2 cm and 0.5cm of Pred] (TestFeat) {Input $\hat{X}_1,, \dots, \hat{X}_m$}; -\node[mynode,below right=2 cm and 0.5cm of Pred] (OutTest) {Output $Y^*_1,\dots, Y^*_m$}; +\node[mynode,below left =2 cm and 0.5cm of Pred] (TestFeat) {Input $\hat{X}$}; +\node[mynode,below right=2 cm and 0.5cm of Pred] (OutTest) {Output $\hat{Y}$}; \only<2>{ \node[mynodeB,below left=-1cm and 0.5cm of Inst] (Feat) {\textbf{Input $X_1, \dots, X_n$}};%Feature-Vektoren @@ -228,12 +228,13 @@ } \only<3>{ \node[mynodeB,below=1.5cm of Inst,align=center] (ML) {\textbf{ML-Model} }; +\node[mynodeB,below=4cm of Inst,align=center] (Pred) {\textbf{Trained ML-Model}}; } \only<4>{ -\node[mynodeB,below left =2 cm and 0.5cm of Pred] (TestFeat) {Input $\hat{X}_1,, \dots, \hat{X}_m$}; +\node[mynodeB,below left =2 cm and 0.5cm of Pred] (TestFeat) {\textbf{Input $\hat{X}$}}; } \only<5>{ -\node[mynodeB,below right=2 cm and 0.5cm of Pred] (OutTest) {Output $Y^*_1,\dots, Y^*_m$}; +\node[mynodeB,below right=2 cm and 0.5cm of Pred] (OutTest) {\textbf{Output $\hat{Y}$}}; } \path %(TestInst) edge[TestEdge] (TestFeat) @@ -248,21 +249,21 @@ (Out) edge[TrainEdge] node {}(ML) (ML) edge[TrainEdge] node {}(Pred) ; -\node[Legend, below left = 1.5cm and 1.5cm of Inst] (E1) {Training}; -\node[Legend, below left= 2.5cm and 1.5cm of Inst] (E2) {Test}; -\node[Legend, left = 1cm of E1] (S1) {}; -\node[Legend, left = 1cm of E2] (S2) {}; - -\path -(S1) edge[TrainEdge] (E1) -(S2) edge[TestEdge] (E2); +%\node[Legend, below left = 1.5cm and 1.5cm of Inst] (E1) {Training}; +%\node[Legend, below left= 2.5cm and 1.5cm of Inst] (E2) {Test}; +%\node[Legend, left = 1cm of E1] (S1) {}; +%\node[Legend, left = 1cm of E2] (S2) {}; + +%\path +%(S1) edge[TrainEdge] (E1) +%(S2) edge[TestEdge] (E2); \end{tikzpicture} } \end{column} - \begin{column}{0.3\textwidth} + \begin{column}{0.38\textwidth} \begin{overprint} \only<2>{ - \includegraphics[width=\textwidth]{figure/catsdog2}} + \includegraphics[width=0.9\textwidth]{figure/catsdog2}} \only<3>{ \includegraphics[width=\textwidth]{figure/GNN} @@ -297,20 +298,20 @@ \scriptsize \node[mynode] at (2,10.5) (1) {Problem}; -\node[mynode2] at (2,9) (2) {OR-expert}; +\node[mynode] at (2,9) (2) {Formalisation}; \node[mynode] at (2,7.5) (3a) {Mathematical \\ model}; \node[mynode] at (2,6) (5a) {Algorithm}; -\visible<4->{ -\node[mynodeB] at (0.5,4) (5b) {\textbf{\normalsize{Data set}}}; +\visible<5->{ +\node[mynodeB] at (0.5,4) (5b) {\textbf{\normalsize{Instance}} \\{\scriptsize(specific data set)}}; \node[mynodeB] at (3.5,4) (6) {\textbf{\normalsize{Solution}}}; } \draw[edge] (1) -> (2); \draw[edge] (2) -> (3a); \draw[edge] (3a) -> (5a); -\visible<4->{ +\visible<5->{ \draw[edge] (5a) -> (6); \draw[edge] (5b) -> (5a); } @@ -319,10 +320,14 @@ \onslide<1| trans:1>{ \node[mynodeB] at (2,10.5) (a) {\normalsize{ \textbf{Problem}}}; } -\visible<2| trans:2>{ +\onslide<2| trans:2>{ +\node[mynodeB] at (2,9) (2) {\normalsize{ \textbf{Formalisation}}}; +} + +\visible<3| trans:3>{ \node[mynodeB] at (2,7.5) (b) { \textbf{\normalsize{Mathematical}} \\ \textbf{\normalsize{model}}}; } -\onslide<3| trans:3>{ +\onslide<4| trans:4>{ \node[mynodeB] at (2,6) (c) {\normalsize{\textbf{Algorithm}} }; } %\onslide<4| trans:4>{ @@ -343,14 +348,19 @@ Problem:\hfill\mbox{}\\[2ex] \includegraphics[width=.99\textwidth]{figure/ZIMPL1} \end{bspBlock} + \onslide<2| trans:2> + \begin{bspBlock}{(Shortest path problem)} + Problem:\hfill\mbox{}\\[2ex] + \includegraphics[width=.99\textwidth]{figure/ZIMPL0} + \end{bspBlock} % -\onslide<2| trans:2> +\onslide<3| trans:3> \begin{bspBlock}{(Shortest path problem)} General mathematical model:\hfill\mbox{}\\[2ex] \includegraphics[width=.99\textwidth]{figure/ZIMPL2} \end{bspBlock} % -\onslide<3| trans:3> +\onslide<4| trans:4> \begin{bspBlock}{(Dijkstra's Algorithm)} \tiny \begin{algorithm}[H] @@ -388,9 +398,9 @@ %% % %% -\onslide<4| trans:4 > +\onslide<5| trans:5 > \begin{bspBlock}{(Solution)} - \includegraphics[width=\textwidth]{figure/ZIMPL9b} + \includegraphics[width=\textwidth]{figure/spp_} \end{bspBlock} \end{overprint} @@ -400,19 +410,19 @@ \frame{ -\frametitle{Machine Learning and Operations Research} +\frametitle{Machine Learning(ML) and Operations Research(OR)} \Large \textbf{Key Question:}\\ \begin{center} -\red{How can ML be used for solving optimisation problems?}\\[7ex]\pause +\red{How can ML be used for solving optimization problems?}\\[7ex]\pause -Combinatorial optimisation problems are quite different from most problems currently solved by ML \citep{BengioEtAl2021} +Combinatorial optimization problems are quite different from most problems currently solved by ML \citep{BengioEtAl2021} \end{center} } \section{Vehicle Routing Problems} \frame{ -\frametitle{The Family of Vehicle Routing Problems} +\frametitle{Example: Family of Vehicle Routing Problems} \onslide<1->{ \scalebox{0.95}{\alt<1|trans:0>{\input{figure/BaseVRP_Prob}}{\input{figure/BaseVRP_Routes}}} \hspace{0.15cm} @@ -455,7 +465,7 @@ Combinatorial optimisation problems are quite different from most problems curre \frame{ -\frametitle{The Family of Vehicle Routing Problems}\small +\frametitle{Family of Vehicle Routing Problems}\small The family of \textbf{Vehicle Routing Problems (VRPs)} forms one of the most important and most widely studied problems in logistics and combinatorial optimization. They are \begin{itemize}\small \item highly \blue{relevant in practice} @@ -498,16 +508,23 @@ z_{3I-MTZ} &=& \min \sum_{k\in K} \sum_{(i,j) \in A} c_{ij}x^k_{ij}\label{mod:3I \frame{ \frametitle{Learning Solutions directly?} \footnotesize + Two questions regarding a solution must be answered:\\ -Feasibility (often difficult to evaluate) and Quality (Solution value) +\blue{Feasibility} (often difficult to evaluate) and \blue{Quality} (Solution value) \pause \newcommand{\feasiblecolor}{green!20!white} \begin{columns} -\begin{column}{0.15\textwidth} -Example: +\begin{column}{0.25\textwidth} +Example:\\[1ex] +\visible<4->{ +\green{Optimal Solution}\\[1ex] +} +\visible<5->{ +\red{Learned Solution}\\ \red{(Infeasible)} +} \end{column} -\begin{column}{0.85\textwidth} +\begin{column}{0.7\textwidth} \begin{tikzpicture} \node[fill=gray!50,circle,minimum size=5pt,inner sep=0pt] at (4,0) {}; @@ -557,40 +574,45 @@ Example: \draw[-Latex,thick] (0,0) -- node[pos=1.03] {$x_1$} (0,4); \draw[-Latex,thick] (0,0) -- node[pos=1.03] {$x_2$} (5,0); -\node[fill=red,circle,minimum size=5pt,inner sep=0pt] at (0,0) {}; -\node[fill=red,circle,minimum size=5pt,inner sep=0pt] at (1,0) {}; -\node[fill=red,circle,minimum size=5pt,inner sep=0pt] at (2,0) {}; -\node[fill=red,circle,minimum size=5pt,inner sep=0pt] at (3,0) {}; -\node[fill=red,circle,minimum size=5pt,inner sep=0pt] at (0,1) {}; -\node[fill=red,circle,minimum size=5pt,inner sep=0pt] at (1,1) {}; -\node[fill=red,circle,minimum size=5pt,inner sep=0pt] at (2,1) {}; +\node[fill=blue,circle,minimum size=5pt,inner sep=0pt] at (0,0) {}; +\node[fill=blue,circle,minimum size=5pt,inner sep=0pt] at (1,0) {}; +\node[fill=blue,circle,minimum size=5pt,inner sep=0pt] at (2,0) {}; +\node[fill=blue,circle,minimum size=5pt,inner sep=0pt] at (3,0) {}; +\node[fill=blue,circle,minimum size=5pt,inner sep=0pt] at (0,1) {}; +\node[fill=blue,circle,minimum size=5pt,inner sep=0pt] at (1,1) {}; +\node[fill=blue,circle,minimum size=5pt,inner sep=0pt] at (2,1) {}; } - \only<4->{ -\node[fill=blue,circle,minimum size=5pt,inner sep=0pt] at (2,2) {}; +\node[fill=green,circle,minimum size=5pt,inner sep=0pt] at (3,0) {}; +} + +\only<5->{ +\node[fill=red,circle,minimum size=5pt,inner sep=0pt] at (3,1) {}; } \end{tikzpicture} \end{column} \end{columns} -\pause\pause -Good/optimal solutions are often located in the border region. -\red{$\Rightarrow$}Learning solutions for highly constrained problems seems not promising: +\pause\pause\phantom{a}\\[0.5ex] +Good/optimal solutions are often located in the border region.\\[0.5ex] +\pause +\red{$\Rightarrow$}Learning solutions for highly constrained problems seems not promising:\\[0.5ex] \begin{itemize} -\item Guaranteeing that the learned solution is feasible is rarely possible -\item No guarantees in terms of solution quality can be given +\item Guaranteeing feasibility is rarely possible +\item No guarantees in terms of solution quality \end{itemize} } \frame{ \frametitle{Combine ML with OR Algorithms!} \small - Sometimes expert knowledge is not satisfactory and algorithmic decisions are taken greedily or according to ``best practice''.\\\bigskip\pause + Status Quo in OR-algorithms:\\ + Sometimes expert knowledge is not satisfactory and \blue{algorithmic decisions} are taken \blue{greedily} or according to ``\blue{best practice}''.\\\bigskip\pause {\large -\blue{$\Rightarrow:$ Apply learning inside OR Algorithms} \citep{BengioEtAl2021}:\medskip}\pause +\blue{$\Rightarrow:$ Apply learning inside OR Algorithms} :\medskip}\pause \begin{itemize} %\item End to end learning (only for little constraint problems like TSP) @@ -598,8 +620,10 @@ Good/optimal solutions are often located in the border region. \item Machine learning alongside optimization algorithms (recurring decisions) \end{itemize} \bigskip\pause -Most ML Applications in OR focus on heuristics! - +\begin{center} +{\large +\green{Most ML Applications in OR focus on heuristics!}\\}\citep{BengioEtAl2021} +\end{center} } @@ -609,7 +633,7 @@ Most ML Applications in OR focus on heuristics! \includegraphics[scale=0.25]{figure/fwflogo} \phantom{a}\\\bigskip\phantom{a}\\ {\Large -Using supervised Learning in Branch-Price-and-Cut for Vehicle Routing Problems +Using Supervised Learning in Branch-Price-and-Cut for Vehicle Routing Problems } \end{center} \begin{columns} @@ -669,8 +693,9 @@ z_{3I-MTZ} &=& \min \sum_{k\in K} \sum_{(i,j) \in A} c_{ij}x^k_{ij}\label{mod:3I \frametitle{Dantzig-Wolfe Reformulation}\small \begin{columns}[T] \begin{column}{0.42\textwidth} +\centering %\footnotesize -\mygreen{Aggregated problem formu-\\lation with routing variables:}\\[0.5ex] +\mygreen{Aggregated problem formu-\\lation with routing variables:}\\[1ex] \scriptsize \blue{$\lambda_{r}$}: Binary variable with $\lambda_{r} = 1$ iff a \blue{vehicle travels the route} $r\in \Omega$ \begin{eqnarray*}\setcounter{equation}{2} @@ -681,11 +706,13 @@ z_{3I-MTZ} &=& \min \sum_{k\in K} \sum_{(i,j) \in A} c_{ij}x^k_{ij}\label{mod:3I \end{eqnarray*} \footnotesize \medskip -$\cDTo$ \textbf{\red{Set-Partitioning~formulation}} +Set-Partitioning~formulation +\red{with exponential many variables} \end{column} \begin{column}{0.64\textwidth} -\visible<3->{\hspace*{-3ex} -\hspace*{2ex}\green{Definition of routing variables:} +\centering +\visible<3->{ +\green{Definition of routing variables:} \[ \Omega = \left\{ r = (y_i, x_{ij})\right\}\text{, with} \] @@ -704,7 +731,8 @@ $\cDTo$ \textbf{\red{Set-Partitioning~formulation}} \end{column} \end{columns} \visible<4->{ - Model can \blue{\underline{not} be fully formulated and solved} using standard software $\Rightarrow$ Solution via \blue{branch-and-price-and cut} } +\centering + Model can \red{\underline{not} be fully formulated and solved} using standard software\\ $\Rightarrow$ Solution via \blue{Branch-Price-and-Cut} } \end{frame} \begin{frame} @@ -818,8 +846,8 @@ $\cDTo$ \textbf{\red{Set-Partitioning~formulation}} \small Where can ML help?\\\pause \begin{itemize}[<+->] -\item Branching and Cutting decision \citep{FurianEtAl2021} -\item Column Selection \citep{MorabitEtAl2021} +\item Branching decisions \citep{FurianEtAl2021} +\item Column selection \citep{MorabitEtAl2021} \item Help solving the subproblem \citep{MorabitEtAl2023} \item \green{Deciding on a relaxation for the subproblem} \item \dots @@ -896,10 +924,10 @@ Find $s$-$t$-path with \blue{minimum cost} such that\\ \bigskip\pause \textbf{SPPRC with $ng$-paths \citep{BaldacciEtAl2011}:} \begin{itemize} - \item \blue{Idea:} prohibit/allow certain cycles to handle trade-off between strength of the relaxation and difficulty of the subproblem - \item A \blue{neighborhood} $N_i\subset V\setminus\{s,t\}$ is associated with each node $i\in V$, e.g.~$i$ and its nearest neighbors - \item A \blue{cycle with a node $i$} is allowed only if the circle contains a node $j$ with $i\notin N_j$ - \item \blue{Special cases of SPPRC with $ng$-routes}:\\ + \item \blue{Idea:} \blue{allow only certain cycles}\\ (strength of relaxation vs difficulty of subproblem) + \item A \blue{neighborhood} $N_i\subset V\setminus\{s,t\}$ is associated with each node $i\in V$ %, e.g.~$i$ and its nearest neighbors + \item \blue{Cycle with node $i$} is allowed if the cycle contains $j$ with $i\notin N_j$ + \item \blue{Special cases} of SPPRC with $ng$-paths:\\ $\cTo$ $N_i = N$ for all $i$ $\Rightarrow$ \blue{ESPPRC}\\ $\cTo$ $N_i = \{i\}$ for all $i$ $\Rightarrow$ \blue{SPPRC}\\ \end{itemize} @@ -916,12 +944,12 @@ Find $s$-$t$-path with \blue{minimum cost} such that\\ \frame{ \frametitle{Learning $ng$-path relaxation}\small -Size of neighborhoods and selection of neighbors have a huge impact on the overall algorithmic performance!\\\smallskip -Usually: Policy-based neighborhoods (e.g., choose the $x$ closed nodes)\\\bigskip\pause +Size of \blue{neighborhoods} and selection of neighbors \blue{have a huge impact} on the overall algorithmic performance!\\\medskip\pause +Status Quo: \blue{Policy-based} neighborhoods\\(e.g., choose the $x$ closed nodes)\\\bigskip\pause\medskip % -What is a \blue{good neighborhood}?\\\smallskip\pause -A \blue{smallest} neighborhood leading to an \blue{elementary solution} of the LP-Relaxation\\\bigskip\pause -How can we find such a neighborhood?\\\smallskip\pause \blue{Dynamic Neighborhood Extension} \citep[DNE, see][]{BodeIrnich2015,RobertiMingozzi2014} +What is a \blue{good neighborhood}?\\\medskip\pause +A \blue{smallest} neighborhood leading to an \blue{elementary solution} of the LP-Relaxation\\\bigskip\pause\medskip +How can we find such a neighborhood?\\\medskip\pause \blue{Dynamic Neighborhood Extension} \citep[DNE, see][]{BodeIrnich2015,RobertiMingozzi2014} } \frame{ @@ -933,12 +961,12 @@ How can we find such a neighborhood?\\\smallskip\pause \blue{Dynamic Neighborhoo \end{enumerate} \medskip\pause \begin{itemize}[<+->] - \item[\green{$+$}] The neighborhood obtained at the end is a smallest that achieves the so-called elementary lower bound + \item[\green{$+$}] The neighborhood obtained at the end is (similar to) a smallest that achieves the so-called elementary lower bound \item[\red{$-$}] Quite time consuming (Full CG-algorithm in each iteration) \end{itemize} \begin{center} \large -\medskip\pause \green{Use supervised learning to get such a neighborhood a priori?} +\medskip\pause \green{Use supervised learning to get such a neighborhood a priori} \end{center} } \begin{frame}{Examples for Neighborhoods obtained by DNE} @@ -947,7 +975,7 @@ How can we find such a neighborhood?\\\smallskip\pause \blue{Dynamic Neighborhoo \end{frame} \section{First results} -\begin{frame}{Generating data} +\begin{frame}{Computational Results -- Generating Data} \small \begin{itemize} \item Over 20,000 VRPTW-instances with 100 customers each are generated @@ -979,7 +1007,7 @@ How can we find such a neighborhood?\\\smallskip\pause \blue{Dynamic Neighborhoo \item Duration of cycle $i-j-i$ %\red{$b_j-(a_i+t_{ij})$} \end{itemize}\pause %\item On top of that, also Graph Encoding variables of the kNN-induced graph can be considered. \red{k=20} - + \medskip \item The neighborhood obtained with DNE is used as the target for classification tasks. \end{itemize} \end{frame} @@ -995,8 +1023,8 @@ How can we find such a neighborhood?\\\smallskip\pause \blue{Dynamic Neighborhoo Models used to perform the classification task:\\\medskip \begin{itemize} \item[\modA] Deep classification head of GNN encoded variables - \item[\modB] Homogeneous GNN - \item[\modC] Heterogeneous GNN \pause + \item[\modB] Homogeneous GNN to predict $ng$-graph + \item[\modC] Heterogeneous GNN to predict $ng$-graph \pause \item[\RF] Random Forest with engineered variables \end{itemize} @@ -1136,26 +1164,26 @@ Accuracy 0.895 \caption{\RF, Accuracy 0.923} \end{figure} \end{frame} -\begin{frame}{DNE-neighborhood vs learned neighborhood} - - -Picture neighborhood vs learned neighborhood - are the learned rather sparse or have more 1-entries then necessary? -%\red{Sensitivity means correctly guessed 1's right? Its impotant cause structure is sparse\\ -%\red{In distribution is test set and out distribution is e.g. solomon instances, instances completely different from trining and test set} -% \begin{table} -% \begin{tabular}{l l l} -% \toprule -% \textbf{Model} & \textbf{In-Distr Sensitivity} & \textbf{Out-Distr Sensitivity} \\ -% \midrule -% Random Forest & 0.930 & 0.273 \\ -% Deep Class. + GNN encoding & 0.933 & 0.025 \\ -% Homogeneous NG encoding & 0.064 & 0.049 \\ -% Heterogeneous NG encoding & - & - \\ -% \bottomrule -% \end{tabular} -% \caption{Comparing sensitivity of the model with a selection of Solomon Instances.} -% \end{table} -\end{frame} +%\begin{frame}{DNE-neighborhood vs learned neighborhood} +% +% +%Picture neighborhood vs learned neighborhood - are the learned rather sparse or have more 1-entries then necessary? +%%\red{Sensitivity means correctly guessed 1's right? Its impotant cause structure is sparse\\ +%%\red{In distribution is test set and out distribution is e.g. solomon instances, instances completely different from trining and test set} +%% \begin{table} +%% \begin{tabular}{l l l} +%% \toprule +%% \textbf{Model} & \textbf{In-Distr Sensitivity} & \textbf{Out-Distr Sensitivity} \\ +%% \midrule +%% Random Forest & 0.930 & 0.273 \\ +%% Deep Class. + GNN encoding & 0.933 & 0.025 \\ +%% Homogeneous NG encoding & 0.064 & 0.049 \\ +%% Heterogeneous NG encoding & - & - \\ +%% \bottomrule +%% \end{tabular} +%% \caption{Comparing sensitivity of the model with a selection of Solomon Instances.} +%% \end{table} +%\end{frame} \subsection{Discussion} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -1174,7 +1202,7 @@ Picture neighborhood vs learned neighborhood - are the learned rather sparse or \begin{frame}{Outlook} \begin{itemize}\setlength\itemsep{1em} \item Evaluate importance of input features - \item Evaluate the learning success by using the learned neighborhood in a fully-fledged branch-price-and-cut algorithm. + \item Evaluate the learning success by using the learned neighborhood in a fully-fledged branch-price-and-cut algorithm \item Extend the research on other VRP-variants \item Use Learning in other parts of the branch-price-and-cut algorithm \end{itemize} -- GitLab