#!/usr/local/bin/bash

# Copyright (c) 2015-2020 Oliver Mahmoudi
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted providing that the following conditions 
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

# mapdir - A utility to map files and directory hierarchies

# Functions:
# readfile()
# check_excludes()
# process_new_dir()
# log_entry()
# get_file_stats()
# get_filename()
# pretty_output()
# check_savefile_existence()
# usage()

# Global variables:
READLINK=
CHECKSUM=
DATE=$(date +%m%d%Y)
DEPTH=0
SAVEFILE=
STRLEN=
EXCLUDES=

# Global variables that are used for the statistics at the end of the program.
DIRS=0
UNREADABLE_DIRS=0
FILES=0
UNREADABLE_FILES=0
BLOCK_SPECIALS=0
CHARACTER_SPECIALS=0
PIPES=0
SOCKETS=0
SYMBOLIC_LINKS=0
UNKNOWN=0
TOTAL_FILES=0
SAVEIFS=$IFS			# save the current Internal Field Seperator
IFS=$(echo -en "\n\b")	# and set a new one
export LC_COLLATE=C

# Global flags for getopts:
d_flag=0				# dotglob flag
e_flag=0				# exclude files flag
e_list=					# list of files to be excluded
f_flag=0				# omit startdir flag
h_flag=0				# use sha256 flag
n_flag=0				# no savefile flag
p_flag=0				# display output as directory tree
s_flag=0				# alternate savefile flag
s_file=					# the alternate savefile itself
t_flag=0				# print statistics flag
x_flag=0				# excludes from file flag
x_file=					# the excludes file itself

#
# This is the main function.
#
readfile()
{
	local _file _depth _psymbol _pnoes _pbinstring

	_file=$1
	_depth=$3
	_psymbol=$4
	_pnoes=$5
	_pbinstring=$6

	# Continue?
	$(check_excludes $_file)
	if [ $? -eq 1 ] ; then
		return
	fi

	if [ -d $_file ]; then
		if [ -r $_file ] && [ -x $_file ]; then
			DIRS=$((DIRS+1))
			TOTAL_FILES=$((TOTAL_FILES+1))
			if [ $f_flag -eq 0 ]; then
				log_entry $_file directory fullpath
				process_new_dir	$_file
			else
				if [ $p_flag -eq 1 ]; then
					pretty_output $_file $_psymbol $_pbinstring
					process_new_dir $_file $_depth
				elif [ $2 -eq 0 ]; then
					log_entry $_file directory filename_only
					process_new_dir	$_file 
				elif [ $2 -eq 1 ]; then
					process_new_dir	$_file
				fi
			fi
		else 
			UNREADABLE_DIRS=$((UNREADABLE_DIRS+1))
			TOTAL_FILES=$((TOTAL_FILES+1))
			if [ $f_flag -eq 0 ]; then
				log_entry $_file "directory is not readable" fullpath
			else
				if [ $p_flag -eq 1 ]; then
					pretty_output $_file $_psymbol $_pbinstring
				elif [ $2 -eq 0 ]; then
					log_entry $_file "directory is not readable" filename_only
				fi
			fi
		fi
	elif [ -b $_file ]; then
		BLOCK_SPECIALS=$((BLOCK_SPECIALS+1))
		TOTAL_FILES=$((TOTAL_FILES+1))
		if [ $f_flag -eq 0 ]; then
			log_entry $_file "block special file" fullpath
		else
			if [ $p_flag -eq 0 ]; then
				log_entry $_file "block special file" filename_only
			else			
				pretty_output $_file $_psymbol $_pbinstring
			fi
		fi
	elif [ -c $_file ]; then
		CHARACTER_SPECIALS=$((CHARACTER_SPECIALS+1))
		TOTAL_FILES=$((TOTAL_FILES+1))
		if [ $f_flag -eq 0 ]; then
			log_entry $_file "character special file" fullpath
		else
			if [ $p_flag -eq 0 ]; then
				log_entry $_file "character special file" filename_only
			else			
				pretty_output $_file $_psymbol $_pbinstring
			fi
		fi
	elif [ -L $_file ]; then
		SYMBOLIC_LINKS=$((SYMBOLIC_LINKS+1))
		TOTAL_FILES=$((TOTAL_FILES+1))
		if [ $f_flag -eq 0 ]; then
			log_entry $_file "symbolic link" fullpath
		else
			if [ $p_flag -eq 0 ]; then
				log_entry $_file "symbolic link" filename_only
			else			
				pretty_output $_file $_psymbol $_pbinstring
			fi
		fi
	elif [ -f $_file ]; then
		if [ -r $_file ]; then				# it's a readable file
			FILES=$((FILES+1))
			TOTAL_FILES=$((TOTAL_FILES+1))
			if [ $f_flag -eq 0 ]; then
				get_file_stats $_file readable fullpath
			else
				if [ $p_flag -eq 0 ]; then
					get_file_stats $_file readable filename_only
				else
					pretty_output $_file $_psymbol $_pbinstring
				fi
			fi
		else 								# unreadable file
			UNREADABLE_FILES=$((UNREADABLE_FILES+1))
			TOTAL_FILES=$((TOTAL_FILES+1))
			if [ $f_flag -eq 0 ]; then
				get_file_stats $_file unreadable fullpath
			else
				if [ $p_flag -eq 0 ]; then
					get_file_stats $_file unreadable filename_only
				else			
					pretty_output $_file $_psymbol $_pbinstring
				fi
			fi
		fi
	elif [ -p $_file ]; then
		PIPES=$((PIPES+1))
		TOTAL_FILES=$((TOTAL_FILES+1))
		if [ $f_flag -eq 0 ]; then
			log_entry $_file pipe fullpath
		else
			if [ $p_flag -eq 0 ]; then
				log_entry $_file pipe filename_only
			else
				pretty_output $_file $_psymbol $_pbinstring
			fi
		fi
	elif [ -S $_file ]; then
		SOCKETS=$((SOCKETS+1))
		TOTAL_FILES=$((TOTAL_FILES+1))
		if [ $f_flag -eq 0 ]; then
			log_entry $_file socket fullpath
		else
			if [ $p_flag -eq 0 ]; then
				log_entry $_file socket filename_only
			else			
				pretty_output $_file $_psymbol $_pbinstring
			fi
		fi
	else
		UNKNOWN=$((UNKNOWN+1))
		TOTAL_FILES=$((TOTAL_FILES+1))
		if [ $f_flag -eq 0 ]; then
			log_entry $_file "unknown file type" fullpath
		else
			if [ $p_flag -eq 0 ]; then
				log_entry $_file "unknown file type" filename_only
			else			
				pretty_output $_file $_psymbol $_pbinstring
			fi
		fi
	fi
}

#
# Check for files that are to be excluded from the search process. Passed via -e and/or -x.
#
check_excludes()
{
	local _file_to_check

	_file_to_check=$1

	# We need the "standard" IFS to parse the array. Otherwise it won't work...
	IFS=$SAVEIFS
	for i in $EXCLUDES ; do
		if [ "$_file_to_check" = "$i" ] ; then
			IFS=$(echo -en "\n\b")
			return 1
		fi
	done

	IFS=$(echo -en "\n\b")
	return 0
}

#
# In case we encounter a new directory, this function gets called.
# It checks whether or nor the new directory has contents and calls
# readline again.
#
process_new_dir()
{
	local _dir _contents _depth _entry _newpath _noes _pcontents _pdir _pnoes _psstring

	# For the pretty_output function, first examine all the parent directories,
	# to find out what type of entries we have and set binary flags accordingly.
	_depth=$2
	if [ $p_flag -eq 1 ] && [ $_depth -ge 1 ]; then
		_pdir=$1
		while [ $_depth -ge 0 ]; do
			cd $_pdir
			cd ..
			_pcontents=*
			_pcontents=($_pcontents)
			_pnoes=${#_pcontents[@]}
			_pdir=$(get_filename $_pdir)

			if [ "${_pcontents[$_pnoes-1]}" = "$_pdir" ]; then
				_psstring="0$_psstring"
			else
				_psstring="1$_psstring"
			fi

			_pdir=$(pwd)
			_depth=$(($_depth-1))
		done
	fi

	# Now process the directory that actually got passed to the function.
	_dir=$1
	_depth=$2
	cd $_dir
	_contents=*
	_noes=($_contents)
	_noes=${#_noes[@]}

	if [ $_noes -ne 0 ]; then
		_depth=$(($_depth+1))

		for _entry in $_contents ; do
			_newpath="$_dir/$_entry"
			if [ $_noes -eq 1 ]; then
				readfile $_newpath 0 $_depth lastentry $_pnoes $_psstring
			else
				readfile $_newpath 0 $_depth middleentry $_pnoes $_psstring
			fi
			_noes=$(($_noes-1))
		done
	fi
}

#
# Log filetype to stdout and to SAVEFILE if desired. This function logs all
# filetypes except for regular files, which need a little more handling based
# on OS and accessability.
#
log_entry()
{
	local _entry _filetype _logtype

	_filetype=$2
	_logtype=$3

	if [ "$_logtype" = "fullpath" ]; then
		_entry=$1
	elif [ "$_logtype" = "filename_only" ]; then
		_entry=$(get_filename $1)
	fi

	echo $_entry - $_filetype
	if [ $n_flag -eq 0 ]; then
		echo $_entry - $_filetype >> $SAVEFILE
	fi
}

#
# This function can be considered to be the log_entry function for regular files.
#
get_file_stats()
{
	local _file

	_file=$1

	if [ "$2" = "readable" ] && [ "$3" = "fullpath" ]; then
		echo $_file - regular file - Size: `ls -l $_file | awk '{ print $5 }'` bytes - \
		${CHECKSUM}: `${CHECKSUM} $_file | awk '{ print $4 }'`
			if [ $n_flag -eq 0 ]; then
				echo $_file - regular file - Size: `ls -l $_file | awk '{ print $5 }'` bytes - \
				${CHECKSUM}: `${CHECKSUM} $_file | awk '{ print $4 }'` >> $SAVEFILE
			fi
	elif [ "$2" = "readable" ] && [ "$3" = "filename_only" ]; then
		echo $(get_filename $_file) - regular file - Size: `ls -l $_file | awk '{ print $5 }'` bytes - \
			${CHECKSUM}: `${CHECKSUM} $_file | awk '{ print $4 }'`
		if [ $n_flag -eq 0 ]; then
			echo $(get_filename $_file) - regular file - Size: `ls -l $_file | awk '{ print $5 }'` bytes - \
				${CHECKSUM}: `${CHECKSUM} $_file | awk '{ print $4 }'` >> $SAVEFILE
		fi
	elif ["$2" = "unreadable" ] && [ "$3" = "fullpath" ]; then
		echo $_file - regular file - Size: `ls -l $_file | awk '{ print $5 }'` bytes - \
			${CHECKSUM}: not readable
		if [ $n_flag -eq 0 ]; then
			echo $_file - regular file - Size: `ls -l $_file | awk '{ print $5 }'` bytes - \
				${CHECKSUM}: not readable >> $SAVEFILE
		fi
	elif ["$2" = "unreadable" ] && [ "$3" = "filename_only" ]; then
		echo $(get_filename $_file) - regular file - Size: `ls -l $_file | awk '{ print $5 }'` bytes - \
			${CHECKSUM}: not readable
		if [ $n_flag -eq 0 ]; then
			echo $(get_filename $_file) - regular file - Size: `ls -l $_file | awk '{ print $5 }'` bytes - \
				${CHECKSUM}: not readable >> $SAVEFILE
		fi
	fi
}

#
# When invoking mapdir with the -f switch, we only log the filename.
#
get_filename()
{
	local _filename

	# Use awk to get the last "/" character and extract the part to the right of it.
	_filename=$(awk -v filename=$1 'BEGIN {
		n = split(filename, a, "/");
		print a[n];
	}')
	
	echo $_filename
}

#
# Print the structure of the argument passed to mapdir to stdout as a pretty output tree.
# Invoked with the -p switch. Needs -f to work.
#
pretty_output()
{
	local _entry _space  _symboltype _binstring _str _box_rh _box_hl _box_vl _box_mh

	# http://jrgraphix.net/r/Unicode/2500-257F
	_box_rh=$(echo -e "\u2514")		# └
	_box_hl=$(echo -e "\u2500")		# ─
	_box_vl=$(echo -e "\u2502")		# │
	_box_mh=$(echo -e "\u251C")		# ├

	_entry=$(get_filename $1)
	_symboltype=$2
	_binstring=$3
	_space="  "
	_str=""

	# Examine the binstring and prepare the middle part of the entry.
	if [ ! -z $_binstring ]; then
		for i in $(seq 0 1 $((${#_binstring}-1))) ; do
			if [ $i -eq 0 ]; then
				continue
			elif [ "${_binstring:i:1}" = "0" ]; then
				_str="$_str "
			elif [ "${_binstring:i:1}" = "1" ]; then
				_str="$_str$_box_vl"
			fi
		done
	fi

	# Now prettyprint the entry.
	if [ $TOTAL_FILES -eq 1 ]; then
		echo "$_box_hl$_box_hl$_entry"
		if [ $n_flag -eq 0 ]; then
			echo "$_box_hl$_box_hl$_entry" >> $SAVEFILE
		fi
	elif [ "$_symboltype" = "middleentry" ]; then
		echo "$_space$_str$_box_mh$_entry"
		if [ $n_flag -eq 0 ]; then
			echo "$_space$_str$_box_mh$_entry" >> $SAVEFILE
		fi
	elif [ "$_symboltype" = "lastentry" ]; then
		echo "$_space$_str$_box_rh$_entry"
		if [ $n_flag -eq 0 ]; then
			echo "$_space$_str$_box_rh$_entry" >> $SAVEFILE
		fi
	fi
}

check_savefile_existence()
{
	local choice

	if [ -e $1 ] ; then			# Confirm
		echo -n "The savefile: $1 already exists. Do you want to overwrite it Yes/No? "
		read -t 30 choice		# We got 30 seconds to make a choice

		case $choice in

		[Yy][Ee][Ss] | [Yy] )
			return
			;;
		[Nn][Oo] | [Nn] )
			echo 'Aborted!'
			exit 1
			;;
		*)
			echo "No input received. Terminating."
			exit 1
			;;
		esac
	fi
}

usage()
{
	echo "usage: mapdir [-dfhnpt] [-e excludes] [-s savefile] \
[-x excludes_file] [file]||[directory]"
	exit 1
}

### Point of entry ###
while getopts ":de:fhnps:tx:" opt ; do
	case $opt in
		d)
			d_flag=1
			;;
		e)
			e_flag=1
			e_list=$OPTARG
			;;
		f)
			f_flag=1
			;;
		h)
			h_flag=1
			;;
		n)
			n_flag=1
			;;
		p)
			p_flag=1
			;;
		s)
			s_flag=1
			s_file=$OPTARG
			;;
		t)
			t_flag=1
			;;
		x)
			x_flag=1
			x_file=$OPTARG
			;;
		\?)
			echo "unkown flag: -$OPTARG."
			usage
			exit
			;;
	esac
done

shift $((OPTIND-1))

# If -p == 1 => -f == 1
if [ $p_flag -eq 1 ] && [ $f_flag -eq 0 ] ; then
	echo "The -p option can only be used with the -f option."
	exit
fi

# Allowing (-p && -e) || (-p && -x) would break the formatting of the output tree.
if ([ $p_flag -eq 1 ] && [ $e_flag -eq 1 ]) || ([ $p_flag -eq 1 ] && [ $x_flag -eq 1 ]) ; then
	echo "The -p option cannot be used with the -e or -x options."
	exit
fi

# Process the other options
if [ $d_flag -eq 0 ]; then
	shopt -s dotglob nullglob
else
	shopt -s nullglob
fi

if [ $h_flag -eq 1 ]; then				# Change CHECKSUM to sha256
	CHECKSUM=sha256
else
	CHECKSUM=md5
fi

if [ $e_flag -eq 1 ]; then
	for i in $e_list ; do
		EXCLUDES="$i $EXCLUDES"
	done
fi

if [ $x_flag -eq 1 ]; then
	while read line
	do
		EXCLUDES="$line $EXCLUDES"
	done < "$x_file"
fi

#
# If an argument is given, take it, otherwise process the current directory.
#
if [ $# -eq 1 ]; then
	READLINK=$(readlink -f $1)
	if [ ! -e $READLINK ]; then
 		echo "The file: $READLINK doesn\'t exist."
		usage
	fi
	if [ $n_flag -eq 0 ]; then
		if [ $s_flag -eq 0 ]; then
			SAVEFILE=~/mapdir$(readlink -f $1 | sed s#/#_#g)_$DATE.txt
			check_savefile_existence $SAVEFILE
			: > $SAVEFILE
		else
			SAVEFILE=~/${s_file}
			check_savefile_existence $SAVEFILE
			: > $SAVEFILE
		fi
	fi
else
	READLINK=$(readlink -f ./)
	if [ $n_flag -eq 0 ]; then
		if [ $s_flag -eq 0 ]; then
			SAVEFILE=~/mapdir$(pwd | sed s#/#_#g)_$DATE.txt
			check_savefile_existence $SAVEFILE
			:> $SAVEFILE
		else
			SAVEFILE=~/${s_file}
			check_savefile_existence $SAVEFILE
			: > $SAVEFILE
		fi
	fi
fi

# When calling the readline function for the first time, we pass a second argument
# of "1" to it. This serves the purpose of pleasing the diff utility when invoking
# mapdir with the -f switch and having a directory as the first argument. If we would
# map the starting directory to the $SAVEFILE and would later on compare it with
# another the $SAVEFILE, the diff utility would obviously exit with a return value
# other than 0, even though the contents of the directories may be truly equivalent.
# Consider for example the folders:
#
# /media/filesystem_a and /media/filesystem_b that both have the same content.
#
# The logic is as follows: if the file is a folder, then the readlink function detects
# this in the "is directory" part and skips mapping its occurence to the $SAVEFILE.
# For subsequent calls to readlink we will pass a second argument of "0" to the
# function, which this time maps it.

# Start processing the file/folder...
readfile $READLINK 1 $DEPTH

#
# At this point, we are done parsing. Now print statistics if desired as per the -t flag.
#
if [ $t_flag -eq 1 ]; then
	echo
	if [ $n_flag -eq 0 ]; then
		echo >> $SAVEFILE
	fi

	STRLEN="########## Statistics for $READLINK ##########"
	echo $STRLEN
	if [ $n_flag -eq 0 ]; then
		if [ $f_flag -eq 0 ]; then
			echo $STRLEN >> $SAVEFILE
		else
			echo "########## Statistics ##########" >> $SAVEFILE
		fi
	fi

	if [ $DIRS -ne 0 ]; then 
		echo Number of directories: $DIRS
		if [ $n_flag -eq 0 ]; then
			echo Number of directories: $DIRS >> $SAVEFILE
		fi
	fi
	if [ $UNREADABLE_DIRS -ne 0 ]; then 
		echo Number of unreadable directories: $UNREADABLE_DIRS
		if [ $n_flag -eq 0 ]; then
			echo Number of unreadable directories: $UNREADABLE_DIRS >> $SAVEFILE
		fi
	fi
	if [ $FILES -ne 0 ]; then
		echo Number of regular files: $FILES
		if [ $n_flag -eq 0 ]; then
			echo Number of regular files: $FILES >> $SAVEFILE
		fi
	fi
	if [ $UNREADABLE_FILES -ne 0 ]; then
		echo Number of unreadble files: $UNREADABLE_FILES
		if [ $n_flag -eq 0 ]; then
			echo Number of unreadble files: $UNREADABLE_FILES >> $SAVEFILE
		fi
	fi
	if [ $BLOCK_SPECIALS -ne 0 ]; then
		echo Number of block special files: $BLOCK_SPECIALS
		if [ $n_flag -eq 0 ]; then
			echo Number of block special files: $BLOCK_SPECIALS >> $SAVEFILE
		fi
	fi
	if [ $CHARACTER_SPECIALS -ne 0 ]; then
		echo Number of character speial files: $CHARACTER_SPECIAL
		if [ $n_flag -eq 0 ]; then
			echo Number of character speial files: $CHARACTER_SPECIAL >> $SAVEFILE
		fi
	fi
	if [ $PIPES -ne 0 ]; then
		echo Number of pipes: $PIPE
		if [ $n_flag -eq 0 ]; then
			echo Number of pipes: $PIPE >> $SAVEFILE
		fi
	fi
	if [ $SOCKETS -ne 0 ]; then 
		echo Number of sockets: $SOCKET
		if [ $n_flag -eq 0 ]; then
			echo Number of sockets: $SOCKET >> $SAVEFILE
		fi
	fi
	if [ $SYMBOLIC_LINKS -ne 0 ]; then
		echo Number of symbolic links: $SYMBOLIC_LINKS
		if [ $n_flag -eq 0 ]; then
			echo Number of symbolic links: $SYMBOLIC_LINKS >> $SAVEFILE
		fi
	fi
	if [ $UNKNOWN -ne 0 ]; then
		echo Number of symbolic links: $UNKNOWN
		if [ $n_flag -eq 0 ]; then
			echo Number of symbolic links: $UNKNOWN >> $SAVEFILE
		fi
	fi
	if [ $TOTAL_FILES -ne 0 ]; then
		echo Total number of files: $TOTAL_FILES
		if [ $n_flag -eq 0 ]; then
			echo Total number of files: $TOTAL_FILES >> $SAVEFILE
		fi
	fi

	# Formatted output
	STRLEN=${#STRLEN}
	while [ $STRLEN -gt 0 ]
	do
		echo -n "#"
		if [ $n_flag -eq 0 ]; then
			if [ $f_flag -eq 0 ]; then
				echo -n "#" >> $SAVEFILE
			fi
		fi
		STRLEN=$((STRLEN-1))
	done
	echo
	if [ $n_flag -eq 0 ]; then
		if [ $f_flag -eq 0 ]; then
			echo >> $SAVEFILE
		else
			echo "################################" >> $SAVEFILE
		fi
	fi
fi # t_flag

IFS=$SAVEIFS			# reset the old IFS
exit 0
