From 9cedc21fe5d34047668c5b688cd685f466df0bb7 Mon Sep 17 00:00:00 2001
From: Grzegorz Daniluk <grzegorz.daniluk@cern.ch>
Date: Thu, 21 Jan 2016 16:14:05 +0100
Subject: [PATCH] doc/wrs_failures: merging procedures with snmp objects
 description

---
 doc/wrs_failures/snmp_exports.tex |  85 ++++--
 doc/wrs_failures/snmp_objects.tex | 463 +++++++++++++++++++++++-------
 doc/wrs_failures/wrs_failures.tex |  19 +-
 3 files changed, 438 insertions(+), 129 deletions(-)

diff --git a/doc/wrs_failures/snmp_exports.tex b/doc/wrs_failures/snmp_exports.tex
index a4164bcce..722cc46d0 100644
--- a/doc/wrs_failures/snmp_exports.tex
+++ b/doc/wrs_failures/snmp_exports.tex
@@ -1,23 +1,67 @@
-\section{SNMP exports}
+\section{SNMP diagnostics and solving problems}
 \label{sec:snmp_exports}
 This section describes SNMP objects exported by the WR Switch. Objects within
-the \texttt{WR\--SWITCH\--MIB} are divided into two categories:
+the \texttt{WR\--SWITCH\--MIB} are divided into two groups:
 \begin{itemize}
-  \item operator/basic objects (section \ref{sec:snmp_exports:basic}) -
-    providing basic status of the switch. It should be used by a control system
-    operators and people without a deep knowledge of the White Rabbit internals.
-    These values report a general status of the device and high level errors.
+  \item General status objects for operators (section
+    \ref{sec:snmp_exports:basic}) - provide a summary about the status of a
+    switch and several main subsystems (like timing, networking, OS). These
+    should be used by control system operators and users without a
+    comprehensive knowledge of the White Rabbit internals. These exports provide
+    a general status of the device and high level errors which is enough in most
+    cases to perform a quick repair.
 
-  \item expert/extended status objects (section \ref{sec:snmp_exports:expert}) -
+  \item Expert objects (section \ref{sec:snmp_exports:expert}) -
     can be used by White Rabbit experts for the in-depth diagnosis of the switch
-    failures. These values are verbose and should not be used by the operators.
+    failures. These values are verbose and normally should not be used by the
+    operators.
 \end{itemize}
 
-\subsection{Operator/basic objects}
+Description of the general status objects in section
+\ref{sec:snmp_exports:basic} includes also a list of actions to follow if a
+particular object reports an error. These repair procedures don't require any
+in-depth knowledge about White Rabbit. Independently of an error reported, there
+are some common remarks that apply to all situations:
+\begin{itemize}
+  \item Linux inside the WR Switch enumerates WR interfaces starting from 0.
+    This means we have to use internally port indexes 0..17. However, the
+    port numbers printed on the front panel are 1..18. Syslog messages
+    generated from the switch use the Linux port numbering. The consequence is
+    that every time Syslog says there is a problem on port X, this refers to
+    port index X+1 on the front panel of the switch.
+  \item If a procedure given for a specific SNMP object does not solve the
+    problem. Please contact WR experts to perform more in-depth analysis of your
+    network. For this, you should provide a complete dump of the WRS status
+    generated in the first step of each procedure.
+  \item First action in most of the procedures below named \emph{Dump state}
+    requires simply calling a tool provided by WR developers that reads all the
+    detailed information from the switch and writes it to a single file that can
+    be later analyzed by the experts.\\
+    {\bf TODO: point to the tool once it's done}
+  \item If solving procedure requires restarting or replacing a broken WR
+    Switch, please make sure that after the repair, all other WR devices
+    connected to the affected switch are synchronized and do not report any
+    problems.
+  \item If a procedure requires replacing switch with a new unit, the broken one
+    should be handled to WR experts or the switch manufacturer to investigate
+    the problem.
+\end{itemize}
+
+\subsection{General status objects for operators}
 \label{sec:snmp_exports:basic}
-This section describes the general status MIB objects that are calculated based
-on the other SNMP (detailed) exports. Most of the status objects described in
-this section can have one of the following values:
+This section describes the general status MIB objects that represent the overall
+status of a device and its subsystems. They are organized in a tree structure
+(fig.\ref{fig:snmp_oper}) where each object reports a problem based on the
+status of its child objects. SNMP object in the third layer of this tree are
+calculated based on the SNMP expert objects. Most of the status objects
+described in this section can have one of the following values:
+\begin{figure}[ht]
+  \begin{center}
+    \includegraphics[width=.8\textwidth]{img/snmp_obj.pdf}
+    \caption{The structure of general status objects for operators}
+    \label{fig:snmp_oper}
+  \end{center}
+\end{figure}
 \begin{itemize}%[leftmargin=0pt]
   \item \texttt{NA} -- status value was not calculated at all (returned value
     is 0). Something bad has happened.
@@ -36,33 +80,34 @@ this section can have one of the following values:
     object. If you see this please report to WR developers.
 \end{itemize}
 
-\noindent {\bf General Status objects}:
+\paragraph*{SNMP objects:}
 
 % SNMP status objects
 \printnoidxglossary[type=snmp_status,title=,style=objtree,sort=def]
 
 \newpage
-\subsection{Expert/extended status}
+\subsection{Expert objects}
 \label{sec:snmp_exports:expert}
 
-\noindent {\bf Expert Status}:
+\paragraph*{SNMP objects:}
 
 % SNMP expert objects
 \printnoidxglossary[type=snmp_expert,style=objtree,sort=def]
 
-\subsection{Other's MIB objects}
-\label{sec:snmp_exports:others}
+%\subsection{Other's MIB objects}
+%\label{sec:snmp_exports:others}
 
+\vspace{12pt}
 \noindent {\bf Objects from other MIBs}:
 
 % other objects
 \printnoidxglossary[type=snmp_other,style=objtree,sort=def]
 
-\subsection{Sorted list of MIB objects}
-\label{sec:snmp_exports:sorted}
+%\subsection{Sorted list of MIB objects}
+%\label{sec:snmp_exports:sorted}
 
 % print alphabetical list
-\printnoidxglossary[type=snmp_all,style=tree,sort=letter]
+%\printnoidxglossary[type=snmp_all,style=tree,sort=letter]
 
 %%%%%%%%%%%%%%%%%%5
 %% Other notes
diff --git a/doc/wrs_failures/snmp_objects.tex b/doc/wrs_failures/snmp_objects.tex
index a306abeab..d0a7522cf 100644
--- a/doc/wrs_failures/snmp_objects.tex
+++ b/doc/wrs_failures/snmp_objects.tex
@@ -3,128 +3,379 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Add status entries in the order as the appear in the MIB
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\snmpentrys{WR-SWITCH-MIB}{}{wrsGeneralStatusGroup} {\\
-  Group containing collective statuses
-  of various subsystems and the main system status, describing the status of
-  entire switch.}
+\snmpentrys{WR-SWITCH-MIB}{}{wrsGeneralStatusGroup}{
+  Group containing collective status of the switch and its various
+  subsystems.}
 
-  \snmpentrys{WR-SWITCH-MIB}{wrsGeneralStatusGroup}{wrsMainSystemStatus} {\\
+  \snmpentrys{WR-SWITCH-MIB}{wrsGeneralStatusGroup}{wrsMainSystemStatus}{
     WRS general status of a switch can be \texttt{OK}, \texttt{Warning} or
-    \texttt{Error}. When there is an error or warning please check the values of
+    \texttt{Error}. In case of an error or warning, please check the values of
     \texttt{wrsOSStatus}, \texttt{wrsTimingStatus} and
     \texttt{wrsNetworkingStatus} to find out which subsystem causes the problem.}
-  \snmpentrys{WR-SWITCH-MIB}{wrsGeneralStatusGroup}{wrsOSStatus} {\\
-    Collective status of the \texttt{wrsOSStatusGroup}. For details please check
-    the group's content.}
-  \snmpentrys{WR-SWITCH-MIB}{wrsGeneralStatusGroup}{wrsTimingStatus} {\\
-    Collective status of the \texttt{wrsTimingStatusGroup}. For details please
-    check the group's content.}
-  \snmpentrys{WR-SWITCH-MIB}{wrsGeneralStatusGroup}{wrsNetworkingStatus} {\\
-    Collective status of the \texttt{wrsNetworkingStatusGroup}. For details
-    please check the group's content.}
-
-\snmpentrys{WR-SWITCH-MIB}{}{wrsDetailedStatusesGroup} {\\
+  \snmpentrys{WR-SWITCH-MIB}{wrsGeneralStatusGroup}{wrsOSStatus}{
+    Collective status of the operating system running on WR switch. In case of
+    an error or warning, please check status objects in the
+    \texttt{wrsOSStatusGroup}.}
+  \snmpentrys{WR-SWITCH-MIB}{wrsGeneralStatusGroup}{wrsTimingStatus}{
+    Collective status of the synchronization subsystem. In case of an
+    error or warning, please check status objects in the
+    \texttt{wrsTimingStatusGroup}.}
+  \snmpentrys{WR-SWITCH-MIB}{wrsGeneralStatusGroup}{wrsNetworkingStatus}{
+    Collective status of the Ethernet switching subsystem. In case of an error
+    or warning, please check status objects in the
+    \texttt{wrsNetworkingStatusGroup}.\vspace{12pt}}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\snmpentrys{WR-SWITCH-MIB}{}{wrsDetailedStatusesGroup}{
   Branch with collective statuses of various switch subsystems.}
 
-  \snmpentrys{WR-SWITCH-MIB}{wrsDetailedStatusesGroup}{wrsOSStatusGroup} {\\
+  %------------------------------------------------------------------------
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsDetailedStatusesGroup}{wrsOSStatusGroup}{
+    \underline{Description:}
     Group with collective statuses of the embedded operating system running on
     the switch.}
-  \snmpentrys{WR-SWITCH-MIB}{wrsOSStatusGroup}{wrsBootSuccessful} {\\
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsOSStatusGroup}{wrsBootSuccessful}{
+    \underline{Description:}
     Grouped status of \texttt{wrsBootStatusGroup}, indicating whether boot was
     successful. \texttt{Error} when dot-config source is wrong, unable to get
     the dot-config, unable to get URL to the dot-config, dot-config contains
     errors, unable to read the hwinfo, unable to load the FPGA bitstream, unable
     to load the LM32 software, any kernel modules or userspace daemons are
-    missing.}
-  \snmpentrys{WR-SWITCH-MIB}{wrsOSStatusGroup}{wrsTemperatureWarning} {\\
-    Report whether the temperature thresholds are not set or are exceeded.}
-  \snmpentrys{WR-SWITCH-MIB}{wrsOSStatusGroup}{wrsMemoryFreeLow} {\\
-    \texttt{Warning} when 50\% of the memory is used, error when more than 80\%
-    of the memory is used.}
-  \snmpentrys{WR-SWITCH-MIB}{wrsOSStatusGroup}{wrsCpuLoadHigh} {\\
-    \texttt{Warning} when the average CPU load is more than 2 for the past 1min,
+    missing.\\
+    \underline{On error:}
+    \begin{pck_proc}
+      \item Dump state
+      \item Check \texttt{wrsBootConfigStatus}, if it reports an
+        error, please verify your WRS configuration.
+      \item Restart the switch
+      \item Please consult WR experts if the problem persists.
+    \end{pck_proc}
+    \glspar \underline{Related problems:}\vspace{12pt}}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsOSStatusGroup}{wrsTemperatureWarning}{
+    \underline{Description:}
+    Reports whether the temperature thresholds are not set or are exceeded.\\
+    \underline{On error:}
+    \begin{pck_proc}
+      \item Dump state
+      \item Verify if your switch configuration contains valid temperature
+        thresholds. By default, they are all set to 80 \textdegree C.
+      \item Verify if cooling of the rack where WR Switch is installed works
+        properly.
+      \item Verify if both cooling fans in the back of the WR Switch case are
+        working.
+      \item Replace the switch with a new unit and consult the WR Switch
+        manufacturer for a repair.
+    \end{pck_proc}
+    \glspar \underline{Related problems:}\vspace{12pt}}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsOSStatusGroup}{wrsMemoryFreeLow}{
+    \underline{Description:}
+    Reports \texttt{Warning} when more than 50\%, or \texttt{Error} when more
+    than 80\% of the memory is used.\\
+    \underline{On error:}
+    \begin{pck_proc}
+      \item Dump state
+      \item Restart the switch
+      \item Send the dumped state of the switch to WR experts for analysis as
+        this might mean there is some internal problem in the WRS firmware.
+    \end{pck_proc}
+    \glspar \underline{Related problems:}\vspace{12pt}}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsOSStatusGroup}{wrsCpuLoadHigh}{
+    \underline{Description:}
+    Reports \texttt{Warning} when the average CPU load is more than 2 for the past 1min,
     1.5 for 5min or 1 for 15min. \texttt{Error} when the average CPU load is
-    more than 3 for the past 1min, 2 for 5min or 1.5 for 15min.}
-	\snmpentrys{WR-SWITCH-MIB}{wrsOSStatusGroup}{wrsDiskSpaceLow} {\\
+    more than 3 for the past 1min, 2 for 5min or 1.5 for 15min.\\
+    \underline{On error:}
+    \begin{pck_proc}
+      \item Dump state
+      \item Restart the switch
+      \item Send the dumped state of the switch to WR experts for analysis as
+        this might mean there is some internal problem in the WRS firmware.
+    \end{pck_proc}
+    \glspar \underline{Related problems:}\vspace{12pt}}
+
+	\snmpentrys{WR-SWITCH-MIB}{wrsOSStatusGroup}{wrsDiskSpaceLow}{
+    \underline{Description:}
     \texttt{Warning} when more than 80\% of any disk partition is used.
-    \texttt{Error} when more than 90\% of any disk partition is used.}
-
-  \snmpentrys{WR-SWITCH-MIB}{wrsDetailedStatusesGroup}{wrsTimingStatusGroup} {\\
-    Group with collective statuses of the timing subsystem.}
-  \snmpentrys{WR-SWITCH-MIB}{wrsTimingStatusGroup}{wrsPTPStatus} {\\
-    \texttt{Error} when any of PTP error counters in
-    \texttt{wrsPtpDataTable} (\texttt{wrsPtpServoStateErrCnt},
-    \texttt{wrsPtpClockOffsetErrCnt} or\\ \texttt{wrsPtpRTTErrCnt}) has
-    increased since the last scan (issue
+    \texttt{Error} when more than 90\% of any disk partition is used.\\
+    \underline{On error:}
+    \begin{pck_proc}
+      \item Dump state
+      \item Check the values of \emph{CONFIG\_WRS\_LOG\_*} configuration options
+        on the switch. These are the parameters describing where log messages
+        should be sent from various processes in the switch. Normally users
+        don't need to modify them, but if any of them is set to a file in the
+        WRS filesystem (e.g. /tmp/snmp.log) this may reduce the free space after
+        some time of operation.
+      \item Restart the switch
+      \item Send the dumped state of the switch to WR experts for analysis as
+        this might mean there is some internal problem in the WRS firmware.
+    \end{pck_proc}
+    \glspar \underline{Related problems:}\vspace{12pt}}
+
+  %------------------------------------------------------------------------
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsDetailedStatusesGroup}{wrsTimingStatusGroup} {
+    \underline{Description:}
+    Group with collective statuses of the timing subsystem.} %\vspace{12pt}}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsTimingStatusGroup}{wrsPTPStatus}{
+    \underline{Description:}
+    Reports the status of PTP daemon running on the switch.\\
+    \texttt{Error} when any of PTP error counters in \texttt{wrsPtpDataTable}\\
+    (\texttt{wrsPtpServoStateErrCnt}, \texttt{wrsPtpClockOffsetErrCnt} or\\
+    \texttt{wrsPtpRTTErrCnt}) has increased since the last scan (issue
     \ref{fail:timing:ppsi_track_phase}, \ref{fail:timing:offset_jump},
     \ref{fail:timing:rtt_jump}), at least one of the $\Delta_{TXM}$,
     $\Delta_{RXM}$, $\Delta_{TXS}$, $\Delta_{RXS}$ is 0 (issue
     \ref{fail:timing:deltas_report}) or PTP servo update counter is not
-    increasing.}
-  \snmpentrys{WR-SWITCH-MIB}{wrsTimingStatusGroup}{wrsSoftPLLStatus} {\\
+    increasing.\\
+    \underline{On error:}
+    \begin{pck_proc}
+      \item Dump state
+      \item Check \texttt{wrsSoftPLLStatus} on the Master (WR device one step
+        higher in a timing hierarchy). Eventually proceed to investigate the
+        problem on the Master switch. Otherwise, continue with the primary WRS.
+      \item Verify if the link to WR Master was not lost by checking the
+        object\\ \texttt{wrsSlaveLinksStatus}.
+      \item If this is not the case, restart the switch.
+      \item If the problem persists replace the switch with a new unit.
+        %(see \ref{cern:wrs_replacement}).
+    \end{pck_proc}
+    \glspar \underline{Related problems:}\vspace{12pt}}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsTimingStatusGroup}{wrsSoftPLLStatus}{
+    \underline{Description:}
+    Reports the status of the PLLs inside the switch.\\
     \texttt{Error} when \texttt{wrsSpllSeqState} is not \emph{Ready}, or
     \texttt{wrsSpllAlignState} is not \emph{Locked} (for Grand Master mode), or
-    any of \texttt{wrsSpllHlock}, \texttt{wrsSpllMlock} equals to 0 (for Slave
-    mode) (issue \ref{fail:timing:spll_unlock}).\\
+    any of \texttt{wrsSpllHlock}, \texttt{wrsSpllMlock} equals to 0 (for
+    Boundary Clock mode).\\
     \texttt{Warning} when \texttt{wrsSpllDelCnt} $>$ 0 (for Grand Master mode)
-    or \texttt{wrsSpllDelCnt} has changed (for all other modes).}
-  \snmpentrys{WR-SWITCH-MIB}{wrsTimingStatusGroup}{wrsSlaveLinksStatus} {\\
-    \texttt{Error} when link to Master is down for a switch in the Slave mode
-    (issue \ref{fail:timing:master_down}). Additionally, \texttt{Error} when the
-    link to Master is up for a switch in the Free-running Master or Grand
-    Master mode.}
-  \snmpentrys{WR-SWITCH-MIB}{wrsTimingStatusGroup}{wrsPTPFramesFlowing} {\\
-    \texttt{Error} when PTP Tx/Rx frame counters on active links (Master / Slave
-    ports) are not being incremented. Report the first run.}
-	      
-  \snmpentrys{WR-SWITCH-MIB}{wrsDetailedStatusesGroup}{wrsNetworkingStatusGroup} {\\
-    Group with collective statuses of the networking subsystem.}
-  \snmpentrys{WR-SWITCH-MIB}{wrsNetworkingStatusGroup}{wrsSFPsStatus} {\\
+    or \texttt{wrsSpllDelCnt} has changed (for all other modes).\\
+    \underline{On error:}\\
+    For GrandMaster WRS:
+    \begin{pck_proc}
+      \item Dump state
+      \item Check 1-PPS and 10 MHz signals coming from an external source.
+        Verify if they are properly connected and, in case of GPS receiver,
+        check if it is synchronized and locked.
+      \item Restart the GrandMaster switch.
+      \item If the problem persists, replace the switch with a new unit.
+        %(see \ref{cern:wrs_replacement}).
+    \end{pck_proc}
+    \glspar For Boundary Clock WRS:
+    \begin{pck_proc}
+      \item Dump state
+      \item Check \texttt{wrsSoftPLLStatus} on the Master. Eventually proceed to
+        investigate the problem on the Master switch.
+      \item Verify if the link to WR Master was not lost by checking the
+        object\\ \texttt{wrsSlaveLinksStatus}.
+      \item Restart the switch.
+      \item If the problem persists, replace the switch with a new unit.
+        %(see \ref{cern:wrs_replacement}).
+    \end{pck_proc}
+    \glspar \underline{Related problems:}\vspace{12pt}}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsTimingStatusGroup}{wrsSlaveLinksStatus}{
+    \underline{Description:}
+    Reports the status of the link on WR ports configured to slave role.\\
+    \texttt{Error} when link to master is down for a switch in the Boundary
+    Clock mode. Additionally, \texttt{Error} is generated when the
+    link to master is up for a switch in the Free-running Master or Grand
+    Master mode.\\
+    \underline{On error:}\\
+    For Master/GrandMaster WRS:
+    \begin{pck_proc}
+      \item Check the configuration of the switch. Especially if the
+        \emph{Timing Mode} is correctly set (i.e. if it was not accidentally set
+        to \emph{Boundary Clock}).
+      \item Check the role of each port timing configuration. They should be all
+        set to \emph{master}. If any of them is set to \emph{slave} you should
+        verify if there is no WR Master connected to it.
+    \end{pck_proc}
+    \glspar For Boundary Clock WRS:
+    \begin{pck_proc}
+      \item Check the fiber connection on the slave port of the WRS.
+      \item Check the configuration of the switch. Especially if the
+        \emph{Timing Mode} is correctly set (i.e. if it was not accidentally set
+        to \emph{Grand-Master} or \emph{Free-Running Master}).
+      \item Check the status of the WR Master connected to the slave port of the
+        WRS.
+      \item Replace the faulty switch with a new unit, if this does not solve
+        the problem, make sure your fiber link is not broken.
+    \end{pck_proc}
+    \glspar \underline{Related problems:}\vspace{12pt}}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsTimingStatusGroup}{wrsPTPFramesFlowing}{
+    \underline{Description:}
+    Reports \texttt{Error} when PTP frames are not being sent and received on
+    active WR ports - Tx/Rx frame counters on active links (master / slave
+    ports) are not being incremented. Reports also \texttt{FirstRead} value.\\
+    \underline{On error:}
+    \begin{pck_proc}
+      \item Check Syslog message to determine the WR port on which the
+        problem is reported. You should see a message similar to this one:\\
+        \texttt{SNMP: wrsPTPFramesFlowing failed for port 1}
+      \item Check your network layout and the WR Switch configuration. If you
+        have some non-WR devices connected to ports of the WR Switch (e.g.
+        computer sending/receiving only data, without the need of
+        synchronization), these ports should have their role in the timing
+        configuration set to \emph{non-wr}.
+      \item Check the status of a WR device connected to the reported port.
+      \item Restart the switch.
+      \item If the problem persists, please contact WR experts for in-depth
+        investigation.
+    \end{pck_proc}
+    \glspar \underline{Related problems:}\vspace{12pt}}
+
+  %------------------------------------------------------------------------
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsDetailedStatusesGroup}{wrsNetworkingStatusGroup}{
+    \underline{Description:}
+    Group with collective statuses of the networking subsystem.\vspace{12pt}}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsNetworkingStatusGroup}{wrsSFPsStatus}{
+    \underline{Description:}
+    Reports the status of SFP transceivers inserted to the switch.\\
     \texttt{Error} when any of the SFPs reports an error. To find out which SFP
-    caused the problem check \texttt{wrsPortStatusSfpError.<n>}.}
-  \snmpentrys{WR-SWITCH-MIB}{wrsNetworkingStatusGroup}{wrsEndpointStatus} {\\
+    caused the problem check \texttt{wrsPortStatusSfpError.<n>}.\\
+    \underline{On error:}
+    \begin{pck_proc}
+      \item Check \texttt{wrsPortStatusSfpError.*} SNMP objects or Syslog
+        messages to determine the WR port on which the problem is reported. In
+        case of Syslog, you should see a message similar to this one:\\
+        \texttt{Unknown SFP vn="AVAGO" pn="ABCU-5710RZ" vs="AN1151PD8A" on port
+        wr1}
+      \item If the reported port is intended to be used to connect a device that
+        does not require WR synchronization (e.g. using a copper SFP module),
+        then you should verify whether the role in the timing configuration for
+        this port is set to \emph{non-wr}.
+      \item Otherwise, you should use a WR-supported SFP module and make sure it
+        is declared together with calibration values in the WRS configuration.
+    \end{pck_proc}
+    \glspar \underline{Related problems:}\vspace{12pt}}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsNetworkingStatusGroup}{wrsEndpointStatus}{
+    \underline{Description:}
+    Reports the status of Ethernet MAC endpoints on WR ports\\
     \texttt{Error} when there is a fault in the Endpoint's
-    transmission/reception path (issue \ref{fail:data:ep_txrx}).}
-  \snmpentrys{WR-SWITCH-MIB}{wrsNetworkingStatusGroup}{wrsSwcoreStatus} {\\
-    Not used in the current release. Always reports \texttt{OK}.}
-  \snmpentrys{WR-SWITCH-MIB}{wrsNetworkingStatusGroup}{wrsRTUStatus} {\\
-    \texttt{Error} when RTU is full and cannot accept more requests (issue
-    \ref{fail:data:rtu_full}).}
-
-  \snmpentrys{WR-SWITCH-MIB}{wrsDetailedStatusesGroup}{wrsVersionGroup} {\\
+    transmission/reception path.\\
+    \underline{On error:}
+    \begin{pck_proc}
+      \item Make several state dumps.
+      \item Restart the switch.
+      \item Check Syslog messages to determine the WR port on which the problem
+        is reported. You should see a message similar to this one:\\
+        \texttt{SNMP: wrsEndpointStatus failed for port 1}
+      \item Check the fiber link on a reported port, i.e. try replacing SFP
+        transceivers on both sides of the link, try using another fiber.
+      \item If the problem persists, please contact WR experts for in-depth
+        investigation.
+    \end{pck_proc}
+    \glspar \underline{Related problems:}\vspace{12pt}}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsNetworkingStatusGroup}{wrsSwcoreStatus}{
+    \underline{Description:}
+    Reports the status of the Ethernet switching module.\\
+    Status object not implemented in the current firmware release. Always
+    reports \texttt{OK}.\\
+    \underline{On error:}
+    \begin{pck_proc}
+      \item Dump state.
+      \item Restart the switch.
+      \item Please contact WR experts since this might mean that either there is
+        too much high priority traffic in your network, or there is some
+        internal problem in the WRS firmware.
+    \end{pck_proc}
+    \glspar \underline{Related problems:}\vspace{12pt}}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsNetworkingStatusGroup}{wrsRTUStatus}{
+    \underline{Description:}
+    Reports the status of the routing module responsible for deciding where (to
+    which port) incoming Ethernet frames should be forwarded.\\
+    \texttt{Error} when RTU is overloaded and cannot accept more requests.\\
+    \underline{On error:}
+    \begin{pck_proc}
+      \item Dump state
+      \item Restart the switch.
+      \item If possible, try reducing the load of small Ethernet frames flowing
+        through your switch. If possible in your application, try using larger
+        Ethernet frames with lower load to transfer information.
+    \end{pck_proc}
+    \glspar \underline{Related problems:}\vspace{12pt}}
+
+  %------------------------------------------------------------------------
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsDetailedStatusesGroup}{wrsVersionGroup}{
+    \underline{Description:}
     Hardware, gateware and software versions. Additionally the serial number and
     other hardware information for the WRS.}
-  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionSwVersion} {\\
-    software version (as returned from the \texttt{git describe} at build
-    time).}
-  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionSwBuildBy} {\\
-    software build-by (as returned from the \texttt{git config --get-all
-    user.name} at build time).}
-  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionSwBuildDate} {\\
-    software build date (\texttt{\_\_DATE\_\_} at build time).}
-  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionBackplaneVersion} {\\
-    hardware version of the minibackplane PCB.}
-  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionFpgaType} {\\
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionSwVersion}{
+    \underline{Description:}
+    Software version in the form of release version and eventually git commit
+    from the repository (information provided from \emph{git describe} command
+    at build time).}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionSwBuildBy}{
+    \underline{Description:}
+    Information who has built the firmware running on the switch (provided from
+    \texttt{git config --get-all user.name} command at build time).}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionSwBuildDate}{
+    \underline{Description:}
+    Firmware build date (\texttt{\_\_DATE\_\_} at build time).}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionBackplaneVersion}{
+    \underline{Description:}
+    Hardware version of the Minibackplane board.}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionFpgaType}{
+    \underline{Description:}
     FPGA model inside the switch.}
-  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionManufacturer} {\\
-    name of the manufacturing company.}
-  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionSwitchSerialNumber} {\\
-    serial number (or string) of the switch.}
-  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionScbVersion} {\\
-    version of the SCB (the motherboard).}
-  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionGwVersion} {\\
-    version of the gateware (FPGA bitstream).}
-  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionGwBuild} {\\
-    build ID of the gateware (FPGA bitstream).}
-  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionSwitchHdlCommitId} {\\
-    gateware version: commit ID from the \texttt{wr\_switch\_hdl} repository.}
-  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionGeneralCoresCommitId} {\\
-    gateware version: commit ID from the \texttt{general-cores} repository.}
-  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionWrCoresCommitId} {\\
-    gateware version: commit ID from the \texttt{wr-cores} repository.}
-  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionLastUpdateDate} {\\
-    date and time of last firmware update, this information may not be accurate,
-    due to hard restarts or lack of the proper time at update.}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionManufacturer}{
+    \underline{Description:}
+    Name of the manufacturing company.}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionSwitchSerialNumber}{
+    \underline{Description:}
+    Serial number of the switch.}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionScbVersion}{
+    \underline{Description:}
+    Hardware version of the main SCB board.}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionGwVersion}{
+    \underline{Description:}
+    Version of the FPGA bitstream (Gateware).}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionGwBuild}{
+    \underline{Description:}
+    Build ID of the FGPA bitstream - the synthesis date}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionSwitchHdlCommitId}{
+    \underline{Description:}
+    FPGA bitstream commit ID from the main \texttt{wr\_switch\_hdl} repository.}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionGeneralCoresCommitId}{
+    \underline{Description:}
+    FPGA bitstream commit ID from the \texttt{general-cores} repository.}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionWrCoresCommitId}{
+    \underline{Description:}
+    FPGA bitstream commit ID from the \texttt{wr-cores} repository.}
+
+  \snmpentrys{WR-SWITCH-MIB}{wrsVersionGroup}{wrsVersionLastUpdateDate}{
+    \underline{Description:}
+    Date and time of the last firmware update, this information may not be
+    accurate, due to hard restarts or lack of the proper time during the
+    upgrade.}
 
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -145,17 +396,17 @@
   \snmpentrye{WR-SWITCH-MIB}{wrsBootStatusGroup}{wrsFaultLR}{Not implemented}
   \snmpentrye{WR-SWITCH-MIB}{wrsBootStatusGroup}{wrsConfigSource}{}
   \snmpentrye{WR-SWITCH-MIB}{wrsBootStatusGroup}{wrsConfigSourceUrl}{}
-  \snmpentrye{WR-SWITCH-MIB}{wrsBootStatusGroup}{wrsRestartReasonMonit}{\\
+  \snmpentrye{WR-SWITCH-MIB}{wrsBootStatusGroup}{wrsRestartReasonMonit}{
     Process that caused \texttt{monit} to trigger a restart.}
   \snmpentrye{WR-SWITCH-MIB}{wrsBootStatusGroup}{wrsBootConfigStatus}{}
   \snmpentrye{WR-SWITCH-MIB}{wrsBootStatusGroup}{wrsBootHwinfoReadout}{}
   \snmpentrye{WR-SWITCH-MIB}{wrsBootStatusGroup}{wrsBootLoadFPGA}{}
   \snmpentrye{WR-SWITCH-MIB}{wrsBootStatusGroup}{wrsBootLoadLM32}{}
-  \snmpentrye{WR-SWITCH-MIB}{wrsBootStatusGroup}{wrsBootKernelModulesMissing}{\\
+  \snmpentrye{WR-SWITCH-MIB}{wrsBootStatusGroup}{wrsBootKernelModulesMissing}{
     List of kernel modules is defined in the source code.}
-  \snmpentrye{WR-SWITCH-MIB}{wrsBootStatusGroup}{wrsBootUserspaceDaemonsMissing}{\\
+  \snmpentrye{WR-SWITCH-MIB}{wrsBootStatusGroup}{wrsBootUserspaceDaemonsMissing}{
     List of daemons is defined in the source code.}
-  \snmpentrye{WR-SWITCH-MIB}{wrsBootStatusGroup}{wrsGwWatchdogTimeouts}{\\
+  \snmpentrye{WR-SWITCH-MIB}{wrsBootStatusGroup}{wrsGwWatchdogTimeouts}{
     Number of times the watchdog has restarted the HDL module responsible for
     the Ethernet switching process}
 
@@ -180,7 +431,7 @@
   \snmpentrye{WR-SWITCH-MIB}{wrsCpuLoadGroup}{wrsCPULoadAvg5min}{}
   \snmpentrye{WR-SWITCH-MIB}{wrsCpuLoadGroup}{wrsCPULoadAvg15min}{}
 
-  \snmpentrye{WR-SWITCH-MIB}{wrsOperationStatus}{wrsDiskTable}{\\
+  \snmpentrye{WR-SWITCH-MIB}{wrsOperationStatus}{wrsDiskTable}{
     Table with a row for every partition.}
   \snmpentrye{WR-SWITCH-MIB}{wrsDiskTable}{wrsDiskIndex.<n>}{}
   \snmpentrye{WR-SWITCH-MIB}{wrsDiskTable}{wrsDiskMountPath.<n>}{}
@@ -269,11 +520,11 @@
 
   \snmpentrye{WR-SWITCH-MIB}{}{wrsPtpDataTable}{Table with a row per PTP servo instance.}
   \snmpentrye{WR-SWITCH-MIB}{wrsPtpDataTable}{wrsPtpIndex.<n>}{}
-  \snmpentrye{WR-SWITCH-MIB}{wrsPtpDataTable}{wrsPtpPortName.<n>}{\\
+  \snmpentrye{WR-SWITCH-MIB}{wrsPtpDataTable}{wrsPtpPortName.<n>}{
     The port on which the instance is running.}
-  \snmpentrye{WR-SWITCH-MIB}{wrsPtpDataTable}{wrsPtpGrandmasterID.<n>}{\\
+  \snmpentrye{WR-SWITCH-MIB}{wrsPtpDataTable}{wrsPtpGrandmasterID.<n>}{
     Not implemented.}
-  \snmpentrye{WR-SWITCH-MIB}{wrsPtpDataTable}{wrsPtpOwnID.<n>}{\\
+  \snmpentrye{WR-SWITCH-MIB}{wrsPtpDataTable}{wrsPtpOwnID.<n>}{
     Not implemented.}
   \snmpentrye{WR-SWITCH-MIB}{wrsPtpDataTable}{wrsPtpMode.<n>}{}
   \snmpentrye{WR-SWITCH-MIB}{wrsPtpDataTable}{wrsPtpServoState.<n>}{}
@@ -290,12 +541,12 @@
   \snmpentrye{WR-SWITCH-MIB}{wrsPtpDataTable}{wrsPtpDeltaRxM.<n>}{}
   \snmpentrye{WR-SWITCH-MIB}{wrsPtpDataTable}{wrsPtpDeltaTxS.<n>}{}
   \snmpentrye{WR-SWITCH-MIB}{wrsPtpDataTable}{wrsPtpDeltaRxS.<n>}{}
-  \snmpentrye{WR-SWITCH-MIB}{wrsPtpDataTable}{wrsPtpServoStateErrCnt.<n>}{\\
+  \snmpentrye{WR-SWITCH-MIB}{wrsPtpDataTable}{wrsPtpServoStateErrCnt.<n>}{
     Number of the servo updates when servo is out of the TRACK\_PHASE.}
-  \snmpentrye{WR-SWITCH-MIB}{wrsPtpDataTable}{wrsPtpClockOffsetErrCnt.<n>}{\\
+  \snmpentrye{WR-SWITCH-MIB}{wrsPtpDataTable}{wrsPtpClockOffsetErrCnt.<n>}{
     Number of servo updates when offset is larger than 500ps or smaller than
     -500ps.}
-  \snmpentrye{WR-SWITCH-MIB}{wrsPtpDataTable}{wrsPtpRTTErrCnt.<n>}{\\
+  \snmpentrye{WR-SWITCH-MIB}{wrsPtpDataTable}{wrsPtpRTTErrCnt.<n>}{
     Number of servo updates when RTT delta between subsequent updates is larger
     than 1000ps or smaller than -1000ps.}
 
diff --git a/doc/wrs_failures/wrs_failures.tex b/doc/wrs_failures/wrs_failures.tex
index 4a746d7e3..eba4f15f9 100644
--- a/doc/wrs_failures/wrs_failures.tex
+++ b/doc/wrs_failures/wrs_failures.tex
@@ -12,6 +12,7 @@
 \usepackage[latin1]{inputenc}
 \usepackage{verbatim}
 \usepackage{amsmath}
+\usepackage{textcomp}
 \usepackage{times,mathptmx}
 \usepackage{chngcntr}
 \usepackage{hyperref}
@@ -51,6 +52,13 @@
 	\setlength{\parsep}{0pt}
 }{\end{itemize}}
 
+\newenvironment{pck_proc}{
+\begin{enumerate}[topsep=2pt]
+	\setlength{\itemsep}{1pt}
+	\setlength{\parskip}{0pt}
+	\setlength{\parsep}{0pt}
+}{\end{enumerate}}
+
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % creating subsubsubsection notation
@@ -252,7 +260,12 @@
 %\bibliographystyle{unsrt}
 %\bibliography{references}
 
-
+\appendix
+\newpage
+\section{Sorted list of all MIB objects}
+\label{sec:snmp_exports:sorted}
+% print alphabetical list
+\printnoidxglossary[type=snmp_all,style=tree,sort=letter]
 
 
 % add not used entries, but don't display their's section
@@ -263,7 +276,7 @@
   \ifglsused{\thislabel}{}{\glsadd[format=ignore]{\thislabel}}%
 }
 
-\newpage
-\input{procedures.tex}
+%\newpage
+%\input{procedures.tex}
 
 \end{document}
-- 
GitLab