#!/bin/bash
#
# cleantool -- Maintenance tool to remove unneeded files and directories
#
# Please see usage information in the HERE document defined below.
#
# Copyright (C) 2010-2016.
#
# $Compile: ./cleantool -h$
# $Maintained at: http://www.visualco.de$
# $Author: Andreas Spindler <info@andreasspindler.de>$
# $Writestamp: 2016-04-20 11:59:20$

########################################################################
# Portability/Environment
#

script=`basename "$0"`
version='1.2.21'
LC_ALL=C  # http://unix.stackexchange.com/questions/87745/what-does-lc-all-c-do
VAR_LOG_DIR=/var/log

E_MISSING=64                    # Missing programs
E_BADARGS=65                    # Bad argument format
E_BADDIR=66                     # Can't change directory
E_NOTROOT=67                    # Non-root exit error
E_NOTCYGWIN=68                  # Non-Cygwin-shell
E_NOTGNU=69                     # GNU utility required

case $(uname) in
    CYGWIN*) under_cygwin=1;;
    *)       under_cygwin=0;;
esac

function ok    {                # normal exit function for (success)
    [ -z "$*" ] || noise "$*"; exit 0; }
function noise { ((optquiet&1)) || ((!$#)) || echo "$@"; }
function quack { ((optverbose)) && noise "$@"; } >&2
function panic {                # signals runtime error(s) to the shell
    cat <<HERE >&2; exit -1; }
Panic! in shell $$, pipe $?, terminal `tty`: $*
$script exits with -1
HERE

function croak {                # croaking signals logical errors/warnings
    echo "$@"; } >&2
function croakFind { croak "WARNING: $find exited with code $?"; }
function croakDir  { croak "$*: No such directory"; exit $E_BADDIR; }
function croakArgs { usage; exit $E_BADARGS; }
function croakUnlessCygwin {
    if ((!under_cygwin)); then
        croak "WARNING: Not a Cygwin system. Use '-f' to force deletion of Windows files"
        exit $E_NOTCYGWIN; fi; }
function croakUnlessGnuFind {
    if ((!have_gnu_find)); then
        croak "WARNING: GNU find required"
        exit $E_NOTGNU; fi;}
function croakUnlessRoot {
    if [ "$UID" -ne "0" ]; then
        croak "WARNING: You must be root to do that."
        exit $E_NOTROOT; fi; }

########################################################################
# Options, Help
#

function version {
    cat <<HERE >&2
$script version $version
HERE
}

function usage {
    version; cat <<HERE >&2

Usage:
    $script [-tgwieEMCY] [-L NUM] [-la] [-fnv] [DIRNAME]
    $script [-H | -V]

HERE
}

function help {
    usage; cat <<HERE >&2
Description:

    SAFELY  find  and/or  remove   junk  files,  compiler  intermediate  files,
    suspicious files, "core" files,  empty files/directories and symbolic links
    that   point  to   nothing.  Handles   dot  files/directories   and  source
    repositories  (e.g.  ".svn")  carefully.  Works on  all  UNIXen,  including
    Windows/Cygwin.

    The script will run in dry  mode (where nothing is actually removed) unless
    one of the following options are given explicitly:

        -t -g -w -i -e -E -a
        -M -L

    The following options never remove files:

        -C -Y

General Options:

    -n      Dry run: just print what would be removed.
    -l      Short for "-an" (list files that could be removed).
    -f      Force harder.
    -q N    Enable quiet mode (0:normal, 1:no status, 2:no paths, 3:both).
    -v      Enable verbose mode.
    -V      Print version information.
    -H      Print this information.

Removal Options:

    -t      Remove temporary files:
                *~      #*#     .tmp    .temp   .cache  NUL     TAGS
            In conjunction with -f additionally:
                .log    .lock   .bak

    -g      Remove intermediate files left behind by GNU tools:
                .d      .o      .gch    a.out
            In conjunction with -f additionally:
                .elc

    -w      Remove intermediate files left behind by Windows programs, Cygwin
            tools and (in particular) Microsoft Visual Studio:
                .cdf    .cache  .pgc    .pgd    .rep    .xdc    .bi
                .tlog   .dep    .idb    .suo    .ilk    .rsp    .ncb
                .sbr    .pch    .bsc    .clw    .obj    .aps    .exp
                .tlb    .tli    .tlh    .crf    .sdf    .opensdf
                .intermediate.manifest
                BuildLog.htm    MSVC.BND
                .unsuccessfulbuild      .lastbuildstate
                ipch/*
            In conjunction with -f additionally:
                .opt    .pdb    .map    .res
                .meta   .metagen
                .manifest
                *_manifest.rc

    -i      Remove files left behind by image processors such as exiftool
            and XnView:
                pspbrwse.jbf    thumbs.db           ExifBrowser.Thumbnails
            In conjunction with -f additionally:
                *.*_original    *.*_exiftool_tmp    *.xnbak.*

    -a      Short for "-tigw".

    -e, -0  Empty files (zero file size).

    -E      Empty directories.

System Maintenance Options:

    -M      Maintainer clean:
                .emacs.desktop.*        *.vcproj.*.user         *.vcxproj.user
            WARNING! -Mf unties CVS/Subversion/SourceSafe working copies:
                .svn/           .cvs/
                vssver2.scc     mssccprj.scc    *.vspscc

    -L NUM  Remove "$VAR_LOG_DIR/wtmp" and tailor "$VAR_LOG_DIR/messages" to NUM
            lines (minium 100 or use -f to allow NUM to be less than 100 lines).
            ONLY ROOT CAN DO THIS.

System Maintenance Options (print only, never removed):

    -C      Find all "core"-files which are at least 5 days old,
    -Y      Find suspicious (fishy) files:
                - world writable files
                - files with no valid owner and/or group
                - SetUID files
                - files with unusual permissions, sizes, names, dates
                - symbolic links that point to nothing

Examples:

    Print a list of removable files:
        \$ $script
    As before, but additionally with empty files/directories:
        \$ $script -l
    As before, but print all removable files, i.e. including those that one
    might not discard thoughlessly:
        \$ $script -lf
    Print a list of empty files and directories
        \$ $script -eEn
    Untie CVS/Subversion/SourceSafe working copy:
        \$ $script -Mf

Installation/Prerequisites:

Portable shell script. Works on any UN*X and Windows (Cygwin) system. Copy this
script  into your  path (e.g.  to "/usr/local/bin"  or  "\$HOME/bin"). Requires
uname, getopts, sh and find. Maintained at <http://www.visualco.de>.

Exit codes:

     0  Indicates success to the shell.
    -1  Indicates an unexpected error.
    $E_BADARGS  Bad command-line arguments.
    $E_BADDIR  "-l" could not change to "$VAR_LOG_DIR".
    $E_NOTROOT  Root user required to perform.
    $E_NOTCYGWIN  "-w" requires a Cygwin-driven shell (use "-f")
    $E_NOTGNU  GNU utility required

HERE
}

optdry=1 optforcedry=0 optverbose=0 optquiet=0 optforce=0 opttemp=0 optgnujunk=0
optemptyfiles=0 optemptydirs=0 optmsjunk=0 optpixeljunk=0 optcores=0 optfishy=0
optloglines=0 optmaintainerclean=0 optdir=

while getopts 'aAl0eEtigwCYL:Mfnq:vHV' opt
do
    case $opt in
        a) loadDefaultOpts; optdry=0;;
        l) loadDefaultOpts; optforcedry=1;;
        0) optemptyfiles=1; optdry=0;;
        e) optemptyfiles=1; optdry=0;;
        E) optemptydirs=1; optdry=0;;
        t) opttemp=1; optdry=0;;
        g) optgnujunk=1; optdry=0;;
        i) optpixeljunk=1; optdry=0;;
        w) optmsjunk=1; optdry=0;;
        C) optcores=1; optdry=1;;
        Y) optcores=1; optdry=1;;
        M) optmaintainerclean=1; optdry=0;;
        L) optloglines=$OPTARG; optdry=0;;
        f) optforce=1;;
        n) optforcedry=1;;
        q) optquiet=$OPTARG;;
        v) optverbose=1;;
        V) version; ok;;
        H) help; ok;;
        *) croakArgs;;
    esac
done
shift $(($OPTIND - 1))
optdir=${1:-.}
if [ -n "$2" ]; then
    croak "$script: Only one search directory allowed" >&2
    exit $E_BADDIR
fi

((optemptyfiles + optemptydirs + opttemp + optgnujunk + optpixeljunk +
  optmsjunk + optcores + optmaintainerclean + optloglines)) || {
    optdry=1 opttemp=1 optgnujunk=1 optpixeljunk=1 # load default options
    ((under_cygwin)) && optmsjunk=1
}
((optforcedry)) && optdry=1
((under_cygwin)) && {
    optdir=$(cygpath "$optdir")
    quack "$optdir: absolute directory '$(cygpath -a "$optdir")'"
}

[ "$optdir" != "/" ] || ((under_cygwin)) || croakUnlessRoot
[ -d "$optdir" ]                         || croakDir "$optdir"

((optmsjunk && optforce==0)) && croakUnlessCygwin # -w requires -f when not under Cygwin

########################################################################
# Test find utility
#
# Test whether 'find -regextpye' works. When under_cygwin few native Windows
# tools (namely find.exe, link.exe and sort.exe) will conflict.
#
find=`which find`
if ((!$under_cygwin)); then
    res=`$find --version`
    if [ "$?" -ne "0" ]; then
        cat <<HERE >&2
WARNING: find is '$find', the native Windows find utility. Possibly the
         Cygwin-bin-directory does not come first in PATH.
HERE
        find=/usr/bin/find res=`$find --version`
        if [ "$?" -ne "0" ]; then
            panic "find not found"
        else
            cat <<HERE >&2
WARNING: Using '$find' explicitly.
HERE
            exit $E_NOTCYGWIN
        fi
    fi
fi

res=`$find -regextype posix-egrep -prune 2>&1 &>/dev/null`
if [ "$?" -ne "0" ]; then       # some prehistoric find version?
    cat <<HERE >&2;
WARNING: '$find' is not GNU find, since it does not understand '-regextype posix-egrep'
HERE
    have_gnu_find=0
else
    have_gnu_find=1
fi

if ((have_gnu_find)); then
    regex_expr='-regextype posix-egrep'
else
    regex_expr=''
fi
if ((optquiet & 2)); then
    remove_file_expr='-exec rm -f {} ;'
    remove_dir_expr='-exec rm -rf {} ;'
    remove_empty_dir_expr='-exec rmdir {} ;'
else
    remove_file_expr='-exec rm -fv {} ;'
    remove_dir_expr='-exec rm -rfv {} ;'
    remove_empty_dir_expr='-exec rmdir -v {} ;'
fi

if ((optdry)); then
    what="$optdir: finding"
    ((optquiet & 2)) && default_execute_expr="" || default_execute_expr="-print"
else
    what="$optdir: removing"
    default_execute_expr="$remove_file_expr"
fi

# taboo_expr prunes some root directories. Under Windows/Cygwin assume
# "/windows-X-drive" is the Windows partition on a dual-boot computer.

taboo_expr="\
        -path /proc -prune \
    -o  -path /sys -prune \
    -o  -path /dev -prune \
    -o -iwholename /windows-*-drive -prune"
((under_cygwin)) && taboo_expr+="\
    -o -iwholename /cygdrive/[a-z]/System?Volume?Information -prune
    -o  -path /cygdrive/[a-z]/\$RECYCLE.BIN/* -prune
    -o  -path /cygdrive -prune"

########################################################################
# Base functions running find.
#

doFindImpl() {
    set -f || panic # disable file pattern expansion; note that the '~'
                    # filename metacharacter is also disabled by this option
    local dotfiles=${1:-0} dotdirs=${2:-0} exec="$3"

    if [[ -n $exec ]]; then
        exec="-a ( $exec )";
    else
        exec="-a -exec true ;"
    fi
    if ((dotfiles)); then
        if ((dotdirs)); then
            if ((optverbose)); then
                noise "WARNING: finding file- and directory names beginning with a dot" >&2
                set -x
            fi
            $find "$optdir" $find_opts $regex_expr                  \
                \( $taboo_expr \) -o                                \
                \( \( $conditional_expr \) $exec \) || croakFind
        else
            if ((optverbose)); then
                noise "WARNING: finding filenames beginning with a dot (but no dot-directories)" >&2
                set -x
            fi
            $find "$optdir" $find_opts $regex_expr                  \
                \( $taboo_expr \) -o                                \
                \( -type d -path '*/.*' -prune \) -o                \
                \( \( $conditional_expr \) $exec \) || croakFind
        fi
    else
        if ((dotdirs)); then
            if ((optverbose)); then
                noise "WARNING: finding directory names beginning with a dot (but no dot-files)" >&2
                set -x
            fi
            $find "$optdir" $find_opts $regex_expr                  \
                \( $taboo_expr \) -o                                \
                \( -type f -name '.*' \) -o                         \
                \( \( $conditional_expr \) $exec \) || croakFind
        else                   # find no dot files and no directories (default)
            ((optverbose)) && set -x
            $find "$optdir" $find_opts $regex_expr                  \
                \( $taboo_expr \) -o                                \
                \( -path '*/.*' \) -o                               \
                \( \( $conditional_expr \) $exec \) || croakFind
        fi
    fi
    set +xf
}

doFind() {
    doFindImpl 0 0 "${1:-$default_execute_expr}"
}

doFindWithDotDirs() {
    doFindImpl 0 1 "${1:-$default_execute_expr}"
}

doFindWithDotFiles() {
    doFindImpl 1 0 "${1:-$default_execute_expr}"
}

doFindWithDots() {
    doFindImpl 1 1 "${1:-$default_execute_expr}"
}

########################################################################
# (0) System files. Truncate $VAR_LOG_DIR/messages to $optloglines (root only).
#
if ((${optloglines:-0})); then
    if ((!$optforce)); then
        [ $optloglines -ge 100 ] || optloglines=100
    fi
    if [ -f "$VAR_LOG_DIR/messages" ]; then
        if ((optdry)); then
            noise "'$VAR_LOG_DIR/messages' lines: $(wc -l \"$VAR_LOG_DIR/messages\")"
            noise "'$VAR_LOG_DIR/messages' words: $(wc -m \"$VAR_LOG_DIR/messages\")"
        else
            noise "    Truncating '$VAR_LOG_DIR/messages' to $optloglines lines"
            croakUnlessRoot
            tail -n $optloglines "$VAR_LOG_DIR/messages" > "$VAR_LOG_DIR/messages.cleantool" || panic
            mv -f "$VAR_LOG_DIR/messages.cleantool" "$VAR_LOG_DIR/messages"
        fi
    else
        noise "WARNING: '$VAR_LOG_DIR/messages' not found" >&2
    fi
    if [ -e "$VAR_LOG_DIR/wtmp" ]; then
        if ((optdry)); then
            noise "'$VAR_LOG_DIR/wtmp' lines: $(wc -l \"$VAR_LOG_DIR/wtmp\")"
            noise "'$VAR_LOG_DIR/wtmp' words: $(wc -m \"$VAR_LOG_DIR/wtmp\")"
        else
            noise "    Truncating '$VAR_LOG_DIR/wtmp'"
            croakUnlessRoot
            cat /dev/null > wtmp
        fi
    else
        noise "WARNING: '$VAR_LOG_DIR/wtmp' not found" >&2
    fi
    ok
fi

########################################################################
# (1) Print suspicious files, remove "core" files.
#
((optverbose)) && ((UID == 0)) && noise "You are root"
((optdry)) && quack "$optdir: DRY RUN, nothing will be deleted"

if ((optcores)); then
    # -C: find core files (NEVER REMOVED).
    noise "$what 'core' files"
    croakUnlessGnuFind
    # core, core.pid
    $find "$optdir" -noleaf                     \
        \( $taboo_expr \) -o                    \
        \( -atime -5 -type f \( -name core -o -name -core.* \) -print \) || panic
fi

if ((optfishy)); then
    if ((!under_cygwin)); then
    # -Y: Print suspicious files (NEVER REMOVED).
    #
    # Adopting a well-known find expression here. World-writebale (-perm 2), no
    # symlinks, no sockets and no directories with the sticky/text bit set
    # (symlinks, sockets and directories with the sticky bit set are often
    # world-writable and generally not suspicious.) -noleaf is required for
    # filesystems of mounted CD drives.

    noise "$what suspicious files"
    noise "$optdir: finding suspicious files"
    croakUnlessGnuFind
    $find "$optdir" -noleaf                                                     \
          \( $taboo_expr \) -o                                                  \
          \( -perm -2 ! -type l ! -type s ! \( -type d -perm -1000 \) \) -print

    # Find symbolic links that point to nothing (NEVER REMOVED).
    #
    # This is a tip from the Unix Guru Universe (http://www.ugu.com). To find
    # dead symbolic links we let perl determine all links that point to
    # nothing. We may further pipe through "rm -vf" to actually delete these
    # links.

    noise "$optdir: finding dead symbolic links"
    $find "$optdir" \
          \( $taboo_expr \) -o -type l -print | perl -nle '-e || print'
fi

########################################################################
# (2) Maintainer clean
#

if ((optmaintainerclean)); then
    # Find/remove ".emacs.desktop.*", ".svn/*", ".cvs/*" etc.
    noise "$what '.emacs.desktop.*'"
    conditional_expr="\
        -type f
         ( -name .emacs.desktop.*\
        -o -name *.vcproj*.user \
        -o -name *.vcxproj*.user )"
    doFindWithDotFiles
fi

###############################################################################
# (3) Remove temporary/intermediate/garbage files.
#

if ((opttemp))
then
    noise "$what temporary files"
    if ((have_gnu_find)); then
        conditional_expr+="\
             -name *.exe.stackdump \
        -o  -regex ^.+/(#.*#|.*~)$ \
        -o -iregex ^.+/(nul|tags)$ \
        -o -iregex ^.+/.*\\.(tmp|temp|cache)$"
        if ((optforce)); then   # remove harder: *.lock, *.log
            conditional_expr+=" -o -iregex ^.+/.*\\.(lock|log|bak)$"
        fi
    else                        # find has no -regextpye
        conditional_expr+="\
            -name #*#   -o  -name *~ \
        -o -iname *.cache \
        -o -iname *.exe.stackdump \
        -o -iname nul   -o -iname tags \
        -o -iname *.tmp -o -iname *.temp"
        if ((optforce)); then
            conditional_expr+="\
        -o -name *.log -o -name *.lock -o -name *.bak"
        fi
    fi
    # Allow dot-files to find (e.g. ".log") but prune dot-directories.
    conditional_expr="-type f ( $conditional_expr )"
    doFindWithDotFiles
fi

if ((optgnujunk))
then
    noise "$what GNU/gcc removable files"
    if ((have_gnu_find)); then
        conditional_expr="\
               -name a.out \
            -o -regex ^.+/.*\\.(d|o|gch)$"
    else
        conditional_expr="\
               -name a.out \
            -o -name *.d -o -name *.o -o -name *.gch"
    fi
    if ((optforce)); then
        conditional_expr+="\
            -o -name *.elc"
    fi
    conditional_expr="-type f ( $conditional_expr )"
    doFind
fi

if ((optmsjunk)); then
    # See also "Common File Extensions Used by Visual C++",
    # http://support.microsoft.com/kb/132340/EN-US/
    noise "$what Microsoft/Cygwin removable files"
    if ((have_gnu_find)); then
        conditional_expr="\
               -name *.exe.stackdump \
            -o -name BuildLog.htm -o -name MSVC.BND \
            -o -name *.intermediate.manifest \
            -o -name *.lastbuildstate -o -name *.unsuccessfulbuild \
            -o -iregex ^.+/.*\\.(obj|rsp|dep|tlb|tlog|aps|exp|idb|tli|tlh|pgc|pgd|rep|xdc|bi)$ \
            -o -iregex ^.+/.*\\.(sdf|opensdf|ncb|clw|cpl|crf|cdf|suo|sbr|ilk|pch|mdp|pg[dc]|bsc)$ \
            -o -iregex ^.+/.*\\.(cache)$"
    else
        conditional_expr="\
               -name *.exe.stackdump \
            -o -name BuildLog.htm -o -iname MSVC.BND \
            -o -name *.intermediate.manifest \
            -o -name *.lastbuildstate -o -name *.unsuccessfulbuild \
            -o -name *.tlog \
            -o -name *.cdf -o -name *.tli -o -name *.tlh -o -name *.pgc \
            -o -name *.pgd -o -name *.rep -o -name *.xdc -o -name *.bi  \
            -o -name *.obj -o -name *.res -o -name *.clw -o -name *.cpl \
            -o -name *.rsp -o -name *.ncb -o -name *.suo -o -name *.sbr \
            -o -name *.ilk -o -name *.pch -o -name *.tlb -o -name *.idb \
            -o -name *.bsc -o -name *.crf -o -name *.mdp -o -name *.aps \
            -o -name *.exp -o -name *.pgd -o -name *.pgc -o -name *.sdf \
            -o -name *.cache \
            -o -name *.opensdf"
    fi
    if ((optforce)); then
        conditional_expr+="\
            -o -name *.meta -o -name *.metagen -o -name *.manifest \
            -o -iname *.opt -o -iname *.res \
            -o -iname *.map -o -iname *.pdb"
    fi
    conditional_expr="-type f ( $conditional_expr )"
    doFind
    # .../ipch/ folders
    conditional_expr="-type d -path */ipch -prune"
    ((optdry)) && doFind || doFind "$remove_dir_expr"
fi

if ((optpixeljunk)); then
    noise "$what thumbnails, caches and backup files from image programs"
    conditional_expr="\
       -iname pspbrwse.jbf \
    -o -iname ExifBrowser.Thumbnails \
    -o -iname thumbs.db"
    if ((optforce)); then       # remove harder: exiftool, XnView backup files
        conditional_expr+="\
    -o -name *.*_original -o -name *.*_exiftool_tmp -o -name *.xnbak.*"
    fi
    conditional_expr="-type f ( $conditional_expr )"
    doFind
fi

###############################################################################
# (4) Remove empty files and directories.
#

if ((optemptyfiles)); then
    noise "$what empty files"
    conditional_expr="-type f -empty"
    doFind
fi

if ((optemptydirs)); then
    noise "$what empty directories"
    find_opts="-depth"
    conditional_expr="-type d -empty"
    if ((optdry)); then
        doFind
    else
        doFind "$remove_empty_dir_expr"
    fi
    find_opts=
fi

((optdry)) && ((optverbose)) && \
    noise "Dry run, nothing was deleted"
ok

# Local Variables:
# coding: iso-8859-1-unix
# fill-column: 80
# End: