#!/bin/sh
# -*- sh -*-

: << =cut

=head1 NAME

lxc_guests - collect statistics about containers virtualized via LXC

=head1 CONFIGURATION

  [lxc_guests]
    user root
    group root

    # The memory usage of containers are by default drawn as stacked area
    # charts. Alternatively a non-stacked graph with lines can be configured.
    # Default: true
    #env.ram_display_stacked true
    
    # The cpu usage in percent of containers are by default drawn as stacked 
    # area charts. Alternatively a non-stacked graph with lines can be 
    # configured.
    # Default: true
    #env.cpu_usage_stacked true

    # lxc container path, default below
    #env.lxcpath /var/lib/lxc

    # exclude the following containers
    # (default none excluded)
    #env.exclude container1 container2

=head1 INTERPRETATION

This version of the plugin replaces the old lxc_guests plugin and works 
with newer lxc versions, with cgroup version 2 and systemd (systemd-cgtop is 
used to get cpu usage). Use an older revision of this plugin for systems with 
cgroup version 1.

This plugin needs root (user and group) privilege.

This plugin has been tested with lxc 4 and lxc 5 (on Debian bullseye and 
Debian booksworm, respectively).

For the logins graph, the "users" command is required in each container and 
user/group has to be set to root for lxc-attach.

=head1 AUTHOR

vajtsz vajtsz@gmail.com
mitty  mitty@mitty.jp
alphanet schaefer@alphanet.ch (many changes and multigraph)
Lars Kruse <devel@sumpfralle.de>
Sebastian L. <https://momou.ch>

=head1 LICENSE

2-clause BSD License
or GPLv3 license or later, at your option

=head1 MAGIC MARKERS

 #%# family=auto
 #%# capabilities=autoconf

=cut

set -eu


. "$MUNIN_LIBDIR/plugins/plugin.sh"


lxcpath=${lxcpath:-/var/lib/lxc}
# containers to be ignored
exclude=${exclude:-}
ram_display_stacked=${ram_display_stacked:-true}
cpu_usage_display_stacked=${cpu_usage_display_stacked:-true}

# --- FUNCTIONS

get_active_guests() {
   local excludes="$1"
   local guest_name
   for guest_name in $(lxc-ls)
   do
      # handle optional exclude list in $1
      if ! echo "$excludes" | grep -qwF "$guest_name"; then
         if lxc-info -n "$guest_name" --state 2>/dev/null | grep -qw RUNNING; then
            echo "$guest_name"
         fi
      fi
   done
}


get_lxc_cgroup_info() {
   local guest_name="$1"
   local field="$2"
   lxc-cgroup -o /dev/stdout -l INFO -n "$guest_name" "$field" | grep -v set_config_idmaps
}


lxc_count_processes () {
   local guest_name="$1"
   local processes

   [ -z "$guest_name" ] && return 0

   processes=$(find /sys/fs/cgroup/lxc.payload."$guest_name"/ -name cgroup.procs -exec cat {} \; | wc -l)
   if [ -n "$processes" ]; then
       echo "$processes"
   fi

}


# change the first character of a string to upper case
title_case() {
   local text="$1"
   printf "%s%s" "$(echo "$text" | cut -c 1 | tr "[:lower:]" "[:upper:]")" "$(echo "$text" | cut -c 2-)"
}


do_autoconf() {
   if [ ! -r /proc/net/dev ]; then
      echo "no (/proc/net/dev cannot be read)"
   elif [ ! -e "$lxcpath" ]; then
      echo "no ($lxcpath is not present)"
   elif [ -z "$(which lxc-ls)" ]; then
      echo "no ('lxc-ls' is not available in PATH)"
   else
      echo yes
   fi
}


do_config() {
   local active_guests guest_name draw_style NCPU graphlimit
   active_guests=$(get_active_guests "$exclude")

   cat <<EOF
multigraph lxc_cpu
graph_title CPU Usage
graph_args -l 0 --base 1000
graph_vlabel USER_HZ
graph_category virtualization
EOF

   for guest_name in $active_guests
   do
      for cpu_usage in user system
      do
         cat <<EOF
$(clean_fieldname "cpu_${cpu_usage}_${guest_name}").label $guest_name: $(title_case "$cpu_usage")
$(clean_fieldname "cpu_${cpu_usage}_${guest_name}").type DERIVE
$(clean_fieldname "cpu_${cpu_usage}_${guest_name}").min 0
EOF
      done
   done

   cat <<EOF

multigraph lxc_cpu_time
graph_title CPU time
graph_args -l 0 --base 1000
graph_vlabel milisec
graph_category virtualization
EOF

   for guest_name in $active_guests
   do
      cat <<EOF
$(clean_fieldname "cpu_time_${guest_name}").label $guest_name
$(clean_fieldname "cpu_time_${guest_name}").type DERIVE
$(clean_fieldname "cpu_time_${guest_name}").min 0
EOF
   done


   NCPU=$(grep -cE '^cpu[0-9]+ ' /proc/stat)
   graphlimit=$((NCPU * 100))

   cat <<EOF
   
multigraph lxc_cpu_usage
graph_title CPU Usage (%)
graph_args -l 0 -u $graphlimit -r
graph_vlabel %
graph_scale no
graph_category virtualization
EOF

   for guest_name in $active_guests
   do
   
      if [ "$cpu_usage_display_stacked" != "true" ]; then
         draw_style="LINE1"
      else
         draw_style="AREASTACK"
      fi
   
      cat <<EOF
$(clean_fieldname "cpu_usage_${guest_name}").label $guest_name
$(clean_fieldname "cpu_usage_${guest_name}").min 0
$(clean_fieldname "cpu_usage_${guest_name}").draw $draw_style
EOF
   done

   cat <<EOF

multigraph lxc_logins
graph_title Logins
graph_args -l 0 --base 1000
graph_vlabel logins
graph_category virtualization
graph_info This graph shows currently logged in users.
EOF

   for guest_name in $active_guests
   do
      cat <<EOF
$(clean_fieldname "logins_${guest_name}").label $guest_name
$(clean_fieldname "logins_${guest_name}").min 0
$(clean_fieldname "logins_${guest_name}").type GAUGE
EOF
   done

   cat <<EOF

multigraph lxc_net
graph_title Network traffic
graph_args --base 1000
graph_vlabel bits in (-) / out (+) per \${graph_period}
graph_category virtualization
graph_info This graph shows the traffic of active LXC containers.
EOF

   for guest_name in $active_guests
   do
      device=$(lxc-info -n "$guest_name" | grep Link | sed 's/Link:\ \+//g')
      if [ -z "$device" ]; then
         continue
      fi
      cat <<EOF
$(clean_fieldname "net_${guest_name}_down").label $guest_name
$(clean_fieldname "net_${guest_name}_down").type DERIVE
$(clean_fieldname "net_${guest_name}_down").graph no
$(clean_fieldname "net_${guest_name}_down").cdef $(clean_fieldname "net_${guest_name}_down"),8,*
$(clean_fieldname "net_${guest_name}_down").min 0
$(clean_fieldname "net_${guest_name}_up").label $guest_name
$(clean_fieldname "net_${guest_name}_up").type DERIVE
$(clean_fieldname "net_${guest_name}_up").negative $(clean_fieldname "net_${guest_name}_down")
$(clean_fieldname "net_${guest_name}_up").cdef $(clean_fieldname "net_${guest_name}_up"),8,*
$(clean_fieldname "net_${guest_name}_up").min 0
EOF
      if [ -r "/sys/class/net/$device/speed" ]; then
         megabit_per_second=$(cat "/sys/class/net/$device/speed")
         bps=$((megabit_per_second * 1000 * 1000))
         cat <<EOF
$(clean_fieldname "net_${guest_name}_down").max $bps
$(clean_fieldname "net_${guest_name}_up").max $bps
EOF
      fi
   done

   cat <<EOF

multigraph lxc_proc
graph_title Processes
graph_args -l 0 --base 1000
graph_vlabel Number of processes
graph_category virtualization
EOF
   for guest_name in $active_guests
   do
      cat <<EOF
$(clean_fieldname "lxc_proc_${guest_name}").label $guest_name
$(clean_fieldname "lxc_proc_${guest_name}").type GAUGE
$(clean_fieldname "lxc_proc_${guest_name}").min 0
EOF
   done
   
   cat <<EOF

multigraph lxc_task
graph_title Tasks
graph_args -l 0 --base 1000
graph_vlabel Number of tasks
graph_category virtualization
EOF
   for guest_name in $active_guests
   do
      cat <<EOF
$(clean_fieldname "lxc_task_${guest_name}").label $guest_name
$(clean_fieldname "lxc_task_${guest_name}").type GAUGE
$(clean_fieldname "lxc_task_${guest_name}").min 0
EOF
   done

   cat <<EOF

multigraph lxc_ram
graph_title Memory
graph_args -l 0 --base 1024
graph_vlabel byte
graph_category virtualization
EOF

   for guest_name in $active_guests
   do
      if [ "$ram_display_stacked" != "true" ]; then
         draw_style="LINE1"
      else
         draw_style="AREASTACK"
      fi

      cat <<EOF
$(clean_fieldname "mem_usage_${guest_name}").label $guest_name
$(clean_fieldname "mem_usage_${guest_name}").type GAUGE
$(clean_fieldname "mem_usage_${guest_name}").draw $draw_style
EOF
   done
}


do_fetch() {
   local active_guests cpu_usage cpu_usage_value device value_up value_down systemd_cgtop
   active_guests=$(get_active_guests "$exclude")
   # Percentage is shown only shown after multiple iterations of systemd-cgtop
   systemd_cgtop=$(systemd-cgtop -b -n4 -d 250ms)

   echo "multigraph lxc_cpu"
   for guest_name in $active_guests
   do
      for cpu_usage in user system
      do
         echo "$(clean_fieldname "cpu_${cpu_usage}_${guest_name}").value $(get_lxc_cgroup_info "$guest_name" "cpu.stat" | grep "$cpu_usage" | awk '{ print $2; }')"
      done
   done

   echo "multigraph lxc_cpu_time"
   for guest_name in $active_guests
   do
      echo "$(clean_fieldname "cpu_time_${guest_name}").value $(get_lxc_cgroup_info "$guest_name" "cpu.stat" | grep "usage_usec" | awk '{ print $2; }')"
   done

   echo "multigraph lxc_cpu_usage"
   for guest_name in $active_guests
   do
      cpu_usage_value="U"
      cpu_usage_value=$(echo "$systemd_cgtop" | grep "lxc.payload.$guest_name " | awk '{ print $3; }' | grep -E '[0-9].' | tail -1)
      echo "$(clean_fieldname "cpu_usage_${guest_name}").value $cpu_usage_value"
   done

   echo "multigraph lxc_logins"
   for guest_name in $active_guests
   do
      echo "$(clean_fieldname "logins_${guest_name}").value $(lxc-attach -n "$guest_name" users | wc -w)"
   done

   echo "multigraph lxc_net"
   for guest_name in $active_guests
   do
      device=$(lxc-info -n "$guest_name" | grep Link | sed 's/Link:\ \+//g')
      if [ -z "$device" ]; then
         value_up="U"
         value_down="U"
      else
         value_up=$(grep -E "^ *${device}:" /proc/net/dev | awk '{print $10;}')
         value_down=$(grep -E "^ *${device}:" /proc/net/dev | awk '{print $2;}')
      fi

      cat <<EOF
$(clean_fieldname "net_${guest_name}_up").value $value_up
$(clean_fieldname "net_${guest_name}_down").value $value_down
EOF
   done

   echo "multigraph lxc_proc"
   for guest_name in $active_guests
   do
      echo "$(clean_fieldname "lxc_proc_${guest_name}").value $(lxc_count_processes "$guest_name")"
   done

   echo "multigraph lxc_task"
   for guest_name in $active_guests
   do
      echo "$(clean_fieldname "lxc_task_${guest_name}").value $(cat /sys/fs/cgroup/lxc.payload."$guest_name"/pids.current)"
   done

   echo "multigraph lxc_ram"
   for guest_name in $active_guests
   do
      cat <<EOF
$(clean_fieldname "mem_usage_${guest_name}").value $(get_lxc_cgroup_info "$guest_name" "memory.current")
EOF
   done
}


case "${1:-}" in
   autoconf)
      do_autoconf
      exit 0
      ;;
   config)
      do_config
      if [ "${MUNIN_CAP_DIRTYCONFIG:-0}" = 1 ]; then do_fetch; fi
      exit 0
      ;;
   "")
      do_fetch
      exit 0
      ;;
   *)
      echo >&2 "Invalid action requested (none of: autoconf / config / '')"
      exit 1
esac

