#!/usr/bin/perl -sw
#$version = "1.005";
#
# Created by Steve Voisey (srv) 26 may 2011.
# email: [steve.voisey@ericsson.com]
#
# name:  tidyOldFiles.pl
#
# Author  Date      Version Description
# ======  ========= ======= ===========
# srv     26/may/11 V1.000  Created.
# srv     17/oct/11 V1.005  Swap from line count to size at MAXLINES, save file cache.
#
# usage: ./tidyOldFiles.pl -type=trim   -file=trim.list.EXAMPLE   -update=no -exceed=no
# usage: ./tidyOldFiles.pl -type=delete -file=delete.list.EXAMPLE -update=no
#

use File::Basename;

my $MAXBYTES = 5265000;
my $MAXLINES = $MAXBYTES/81;
my $MARK            = "*";

my $totalBytes = 0;
my $divide = "=" x 80 . "\n";


my $date = `date '+%d %h %y'`;
chomp($date);

my $time = `date '+%H:%M'`;
chomp($time);

print $divide;
print "\t\t\t\t$date $time\n\n";

if (defined($help))    { help(); exit;}

unless (defined($type)) { $type = "trim"; }
unless (( $type eq "trim" ) || ( $type eq "delete" )) {
    die "invalid type $type $!"; }

unless (defined($file))   {
    if ( $type eq "trim" ) {
        $file = "/opt/tandbergtv/cms/scripts/conf/trim.list";
    }
    if ( $type eq "delete" ) {
        $file = "/opt/tandbergtv/cms/scripts/conf/delete.list";
    }
}
unless (defined($update)) { $update = "no"; }
unless (defined($exceed)) { $exceed = "no"; }
unless (defined($log))    { $log    = 0; }

unless (open (FILE, "<$file"))   { die "cannot open $file $!"; }
@fileArray   = <FILE>;
close FILE;

print "type [$type] update [$update] exceed [$exceed] log [$log]\n";
print "file [$file]\n";
print "$divide\n";
$divideFile = "-" x 36;

if ( $type eq "trim" )   {
  # printf "%7s %10s %8s %8s %6s %-36s\n","       ", $lineCount, $max, $trim, $size, $inputFile;
    printf "%7s %10s %8s %8s %6s %-36s\n","EXCEEDS", "current", "max", "trim", "size", "file";
    printf "%7s %10s %8s %8s %6s %-36s\n","-------", "-------", "---", "----", "----", $divideFile;
}

foreach $line (@fileArray) {
    chomp ($line);

    if ( $type eq "trim" )   { trim($update, $line); }
    if ( $type eq "delete" ) { deleteFile($update, $line); }
}

if ( $type eq "trim" )   {
    print $divide;
    size($totalBytes);
    print "\nNOTE: $MARK maximum bytes set to [$MAXBYTES] lines set to [$MAXLINES]\n"; 
    print "\n";
}

if ( $type eq "delete" ) {
    print $divide;
    print "\n";
}

exit;

##################################################################################
##################################################################################

sub trim {
    my $update = $_[0];
    my $line   = $_[1];
    
    my ($inputFile, $max, $trim, $lineCount, $size, $bytes) = "";
    my @fileArray = ();

    my $patternMinValid = "^([^\\|]+)\\|([\\d]+)";
    my $patternWithTrim = "^([^\\|]+)\\|([\\d]+)\\|([\\d]+)[\\s]*\$";
    my $patternNoTrim   = "^([^\\|]+)\\|([\\d]+)[\\|\\s]*\$";
    my $maxChar         = "";
    

    chomp ($line);

    if ( $line =~ /^#/ ) { return; }
    if ( $line =~ /^\s*$/ ) { return; }

    # allow for any messy trailing spaces ( [\\s]*\$ ).
    # note, as this is a string need to escape each '\' hence '\\'.

    unless ( $line =~ /$patternMinValid/ ) {
        print "invalid line: $line\n";
        return;
    }

    if ( $line =~ /$patternWithTrim/ ) {
        ($inputFile, $max, $trim) = $line =~ /$patternWithTrim/;
        chomp($inputFile); chomp($max); chomp($trim);
        
    } elsif ( $line =~ /$patternNoTrim/ ) {
        ($inputFile, $max) = $line =~ /$patternNoTrim/;
        chomp($inputFile); chomp($max);
        
    } else {
        print "invalid line: $line\n";
        return;
    }

    # Ignore max if its greater than MAXLINES and use MAXLINES instead.
    if ( $max >= $MAXLINES ) { $max = $MAXLINES; $maxChar = $MARK . $max; } else { $maxChar = $max; }
    
    unless ( defined($trim)) { $trim = ( $max * 0.75 ); }
    if ( $trim eq "" )       { $trim = ( $max * 0.75 ); }

    # trim may well be a fraction if defaulted, and catch max just incase.
    # tail must have an integer!

    $max = int($max);
    $trim = int($trim);

    if ( $inputFile =~ /\*/ ) {
        # note: the {} around the variable are essential to pickup
        #       the wildcard in the glob. A perl thing!
        @fileArray = <${inputFile}>;

        foreach $newLine (@fileArray) {
            #print "wild: max [$max] trim [$trim]\n";
            trim($update, "$newLine|$max|$trim");
        }
        return;
    }

    if ( -d $inputFile ) {
        @fileArray = <$inputFile/*>;
        foreach $newLine (@fileArray) {
            #print "dir: max [$max] trim [$trim]\n";
            trim($update, "$newLine|$max|$trim");
        }
        return;
    }



    unless ( -f $inputFile ) { print "WARNING file does not exist: $inputFile\n"; return; }

    $size = `du -h $inputFile | cut -f1`;
    chomp ($size);
    $bytes = `du -b $inputFile | cut -f1`;
    chomp ($bytes);
    $totalBytes = $totalBytes + $bytes;

    # Now, from painful experience, Linux caches all file reads
    # so if we 'wc' a file, that gets added to cache which is not really what we want.
    # If you 'wc' a 25GB file that is all your free memory gone!
    # So we set a max file size in bytes, assume 81 char a line, and use that as a max 
    # line count to prevent processing excessively large log files.
    #
    # Note: 5265000 bytes / 5.1 MB == 65000 line file with 81 characters per line.
    #       Should be big enough for a log file?
    
    if ( $bytes > $MAXBYTES ) {
        $lineCount = $MAXLINES;
        $maxChar = $MARK . $max;
    } else {
        $lineCount = `wc -l $inputFile`;
        chomp ($lineCount);
        @tmp = split(" ", $lineCount);
        $lineCount = $tmp[0];
    }

    if ($lineCount > $max) { $flag = "EXCEEDS"; } else { $flag = "       "; }
    
    unless ( $exceed =~ /yes/i ) {
        printf "%7s %10s %8s %8s %6s %-36s\n", $flag, $lineCount, $maxChar, $trim, $size, $inputFile;
    }
    
    # If exceed flag is set, only print out files that exceed max line length.
    if (( $exceed =~ /yes/i ) && ($lineCount > $max)) {
        printf "%7s %10s %8s %8s %6s %-36s\n", $flag, $lineCount, $maxChar, $trim, $size, $inputFile;
    }
    
    if (($lineCount > $max) && ( $update =~ /yes/i )) {
        print "\nexecuting: \n tail -$trim $inputFile > $inputFile.TMP; mv $inputFile.TMP $inputFile\n";
        system("tail -$trim $inputFile > $inputFile.TMP; mv $inputFile.TMP $inputFile");
    }
    return;
}

##################################################################################
##################################################################################

sub deleteFile {
    my $update = $_[0];
    my $line   = $_[1];

    my ($inputFile, $maxDays, $maxNumber, $fileCount, $commandList, $commandDelete) = "";
    my ($name, $path, $suffix, $minusDays) = "";

    my @fileArray = ();

    my $patternMinValid = "^([^\\|]+)\\|([\\d]+)";
    my $patternWithNum = "^([^\\|]+)\\|([\\d]+)\\|([\\d]+)[\\s]*\$";
    my $patternNoNum   = "^([^\\|]+)\\|([\\d]+)[\\s]*\$";
    
    chomp ($line);

    if ( $line =~ /^#/ ) { return; }
    if ( $line =~ /^\s*$/ ) { return; }
    
    unless ( $line =~ /$patternMinValid/ ) {
        print "invalid line: $line\n";
        return;
    }

    if ( $line =~ /$patternWithNum/ ) {
        ($inputFile, $maxDays, $maxNumber) = $line =~ /$patternWithNum/;
        chomp($inputFile); chomp($maxDays); chomp($maxNumber);
        
    } elsif ( $line =~ /$patternNoNum/ ) {
        ($inputFile, $maxDays) = $line =~ /$patternNoNum/;
        chomp($inputFile); chomp($maxDays);
        
    } else {
        print "invalid line: $line\n";
        return;
    }
    
    unless ( defined($maxNumber)) { $maxNumber = $maxDays; }
    if ( $maxNumber eq "" )       { $maxNumber = $maxDays; }
    

    @anyFiles =  glob ${inputFile};
    #print "number files: " . @anyFiles . "[@anyFiles]\n";

    if ( @anyFiles == 0 ) { print "\nWARNING file not found: $inputFile\n\n"; return; }

    if ( -d $inputFile ) { print "\nWARNING file is a directory, skipping: $inputFile\n\n"; return; }
    # Can not get find | wc -l to work for directories/* so disable for now.

    #if ( -d $inputFile ) {
    #    deleteFile($update, "${inputFile}/*|$maxDays|$maxNumber");
    #    return;
    #}

    # unless ( -f $inputFile ) { print "WARNING file does not exist: $inputFile\n"; return; }

    #print "files found for $inputFile\n";
    #print `ls -alh $inputFile`;

    ($name,$path,$suffix) = fileparse($inputFile);

    if ( $path eq "*" ) { print "\nWARNING file is ONLY a wildcard, not supported, skipping: $inputFile\n\n"; return; }
    #print "file: $path$name  - $suffix\n";

    # In the wonderful world of unix:
    #
    #    "find . -mtime +1" - find files modified more than 48 hours ago
    #
    # So for maxDays to make sense for the user, need to subtract one.
    
    $findMaxDays = $maxDays; 
    unless ( $findMaxDays == 0 ) { $findMaxDays--; }
    
    $minusDays = $findMaxDays + 1;
    
    # need to quote '' $name when this evaluates to * or find will error.

    # print "\ncount command: find  $path -name '${name}' -mtime -$minusDays -exec ls -alh {} \\; | wc -l\n";

    if ( $name eq "*" ) {
        # This is for the directory/* that we do not curently support, so this should never get executed.
        # This path does not error, but produces a zero file count :(
        $command       = "find  $path -name '${name}' -mtime -$minusDays -exec ls -alh {} \\\; | wc -l";
        #$newFileCount = `find  $path -name '"${name}"' -mtime -$minusDays -exec ls -alh {} \\\; | wc -l`;
        $newFileCount = `$command`;
        if ( $log ) { print "cmd newFileCount: $command\n"; }
    } else {
        $command       = "find  $path -name '${name}' -mtime -$minusDays -exec ls -alh {} \\\; | wc -l";
        #$newFileCount = `find  $path -name '${name}' -mtime -$minusDays -exec ls -alh {} \\\; | wc -l`;
        $newFileCount = `$command`;
        if ( $log ) { print "cmd newFileCount: $command\n"; }
    }

    chomp($newFileCount);
    $command       = "find  $path -name '${name}' -mtime +$findMaxDays -exec ls -alh {} \\\; | wc -l";
    #$oldFileCount = `find  $path -name '${name}' -mtime +$findMaxDays -exec ls -alh {} \\\; | wc -l`;
    $oldFileCount = `$command`;
    chomp($oldFileCount);
    if ( $log ) { print "cmd oldFileCount: $command\n"; }
    print "\nfiles: [new|old] [$newFileCount|$oldFileCount] [num|days] [$maxNumber|$maxDays] $path$name\n";

    if (( $newFileCount >= $maxNumber ) && ( $oldFileCount > 0 )) {

        print "\nfiles older than [$maxDays] days:\n";
        print $divide;
        $commandList   =  "find  $path -name '$name' -mtime +$findMaxDays -exec ls -alh {} \\\;";
        $commandDelete =  "find  $path -name '$name' -mtime +$findMaxDays -exec rm -vf {} \\\;";
        print "$commandList\n";
        print `$commandList`;

        if ( $update =~ /yes/i ) {
            print "\nfiles older than [$maxDays] days will be removed:\n";
            print $divide;
            print "$commandDelete\n";
            print `$commandDelete`;

        }
    }
    return;
}

##################################################################################
##################################################################################

sub size {
    my $bytes = $_[0];
    my $kay   = 1024;

    my $totalKB = int($bytes / $kay);
    my $totalMB = int($totalKB / $kay);
    my $totalGB = int($totalMB / $kay);

    if ( $totalGB > 1 ) { print "\ntotal size: $totalGB GB\n"; return; }
    if ( $totalMB > 1 ) { print "\ntotal size: $totalMB MB\n"; return; }
    if ( $totalKB > 1 ) { print "\ntotal size: $totalKB KB\n"; return; }

    print "\ntotal size: $bytes Bytes\n\n";
    return;
}


##################################################################################
##################################################################################

sub help {

    $helpText1 = <<ENDHELPPAGE1;



  usage: ./trimOldFiles.pl -file=trim.list.EXAMPLE -update=no -exceed=no

        update [yes|no]
                default 'no'
                no  - Only display status of each file.
                yes - Truncate the file back to the value [\$trim]

        exceed [yes|no]
                default 'no'
                no  - Display each file.
                yes - Only display files where length exceeds [\$max]

        file    input list file
                default '/opt/tandbergtv/cms/scripts/conf/trim.list'

  Input file format supports:

   comments [^#] and spaces [^\\s+\$] - lines are ignored.
                                                                                                            
   file path:   /home/file.log - the file 'file.log' will be processed.

   directories: /home/log      - all files in '/home/log' will be processed
                                 <** SEE NOTE BELOW **>

   wild cards:  /home/file*txt - all matching files will be processed.
                                 <** SEE NOTE BELOW **>

   /home/file.log|1000|500     - If 'file.log' exceeds 1000 lines
                                 it will be truncated to '500' lines.

   /home/file.log|1000         - If 'file.log' exceeds 1000 lines
                                 by default it will be truncated to '75\%' of
                                 its current size ( 1000 x 0.75 ) 750 lines.

################################################################################
#                                                                              #
#                       ******* NOTE WARNING ********                          #
#                                                                              #
#    ****** !!! ESPECIALLY WHEN USING DIRECTORIES OR WILDCARDS !!! *******     #
#                                                                              #
# Only include log files, files that are constantly updated and can be         #
# truncated or deleted without impacting the running system.                   #
#                                                                              #
# For example including source files, property files or binaries WILL          #
# corrupt/destroy the files if they get truncated.                             #
#                                                                              #
################################################################################

ENDHELPPAGE1
print $helpText1;

}

##################################################################################
# end                                                                            #
##################################################################################

