package Cpanel::CPAN::Digest::MD5::File;

use strict;
use warnings;
use Carp;
use Digest::MD5;
eval { require Encode; };
use LWP::UserAgent;

require Exporter;
our @ISA       = qw(Exporter Digest::MD5);
our @EXPORT_OK = qw(dir_md5  dir_md5_hex  dir_md5_base64
  file_md5 file_md5_hex file_md5_base64
  url_md5  url_md5_hex  url_md5_base64);

our $BINMODE  = 1;
our $UTF8     = 0;
our $NOFATALS = 0;

sub import {
    my $me = shift;
    my %imp;

    @imp{@_} = ();
    for (@EXPORT_OK) {
        delete $imp{$_} if exists( $imp{$_} );
    }

    $BINMODE  = 0 if exists $imp{-nobin};
    $UTF8     = 1 if exists $imp{-utf8};
    $NOFATALS = 1 if exists $imp{-nofatals};

    for ( keys %imp ) {
        s/^-//;
        $imp{$_} = '' unless $_ =~ m/^(no)?(bin|utf8|fatals)$/;
        push @EXPORT_OK, $_ unless $_ =~ m/^(no)?(bin|utf8|fatals)$/;
        delete $imp{"-$_"} if exists $imp{"-$_"};
    }

    $me->export_to_level( 1, $me, grep( !/^-/, @_ ) );
    Digest::MD5->import( keys %imp );
}

our $VERSION = '0.08';

my $getfh = sub {
    my $file = shift;

    croak "$file: Does not exist" if !-e $file && !$NOFATALS;
    croak "$file: Is a directory" if -d $file  && !$NOFATALS;

    if ( -e $file && !-d $file ) {
        open my $fh, $file or return;
        binmode $fh if $BINMODE;
        return $fh;
    }
    else { return undef; }
};

my $getur = sub {
    my $res = LWP::UserAgent->new->get( shift() );
    return $res->is_success ? $res->content : undef;
};

sub Digest::MD5::adddir {
    my $md5  = shift;
    my $base = shift;
    for my $key ( sort keys %{ _dir( $base, undef, undef, 3 ) } ) {
        next if !$key;
        my $file = File::Spec->catfile( $base, $key );
        $md5->addpath($file) or carp "addpath $file failed: $!" if !-d $file;
    }
    return 1;
}

sub _dir {
    my ( $dir, $hr, $base, $type, $cc ) = @_;
    require File::Spec;    # only load it if its needed

    $cc   = {}   if ref $cc ne 'HASH';
    $hr   = {}   if ref $hr ne 'HASH';
    $base = $dir if !defined $base;
    $type = 0    if !defined $type;

    my $_md5func = \&file_md5;
    $_md5func = \&file_md5_hex    if $type eq '1';
    $_md5func = \&file_md5_base64 if $type eq '2';

    opendir( DIR, $dir ) or return;
    my @dircont = sort grep { $_ ne '.' && $_ ne '..' } readdir(DIR);
    closedir DIR;

    for my $file (@dircont) {
        my $_dirver = File::Spec->catdir( $dir, $file );
        my $full =
          !-l $_dirver && -d _
          ? $_dirver
          : File::Spec->catfile( $dir, $file );

        if ( exists $hr->{$full} ) {
            carp "$full seen already, you may have circular links";
            $cc->{$full}++;
            croak "$full is in a circular link, bailing out."
              if $cc->{$full} > 4;
        }

        my $short = File::Spec->abs2rel( $full, $base );

        if ( -l $full || -f $full ) {
            $hr->{$short} = '';
            $hr->{$short} = $_md5func->($full) or return if $type ne '3';
        }
        elsif ( -d $full ) {
            $hr->{$short} = '';
            _dir( $full, $hr, $base, $type, $cc ) or return;
        }
        else {
            carp "$full is not a symlink, file, or directory, ignoring\n";
        }
    }
    return $hr;
}

sub dir_md5 {
    push @_, undef if @_ < 3;
    push @_, undef if @_ < 3;
    _dir( @_, 0 );
}

sub dir_md5_hex {
    push @_, undef if @_ < 3;
    push @_, undef if @_ < 3;
    _dir( @_, 1 );
}

sub dir_md5_base64 {
    push @_, undef if @_ < 3;
    push @_, undef if @_ < 3;
    _dir( @_, 2 );
}

sub file_md5 {
    my ( $file, $bn, $ut ) = @_;
    local $BINMODE = $bn if defined $bn;
    local $UTF8    = $ut if defined $ut;
    my $md5 = Digest::MD5->new();

    if ( -l $file ) {
        my $target = readlink($file);
        $md5->add( $UTF8 ? Encode::encode_utf8($target) : $target );
        return $md5->digest;
    }
    elsif ( -z _ ) {
        $md5->add( $UTF8 ? Encode::encode_utf8($file) : $file );
        return $md5->digest;
    }

    my $fh = $getfh->($file) or return;

    my $buf;
    while ( my $l = read( $fh, $buf, 1024 ) ) {
        $md5->add( $UTF8 ? Encode::encode_utf8($buf) : $buf );
    }
    return $md5->digest;
}

sub file_md5_hex {
    my ( $file, $bn, $ut ) = @_;
    local $BINMODE = $bn if defined $bn;
    local $UTF8    = $ut if defined $ut;

    my $md5 = Digest::MD5->new();

    if ( -l $file ) {
        my $target = readlink($file);
        $md5->add( $UTF8 ? Encode::encode_utf8($target) : $target );
        return $md5->hexdigest;
    }
    elsif ( -z _ ) {
        $md5->add( $UTF8 ? Encode::encode_utf8($file) : $file );
        return $md5->hexdigest;
    }

    my $fh = $getfh->($file) or return;

    my $buf;
    while ( my $l = read( $fh, $buf, 1024 ) ) {
        $md5->add( $UTF8 ? Encode::encode_utf8($buf) : $buf );
    }
    return $md5->hexdigest;
}

sub file_md5_base64 {
    my ( $file, $bn, $ut ) = @_;
    local $BINMODE = $bn if defined $bn;
    local $UTF8    = $ut if defined $ut;

    my $md5 = Digest::MD5->new();

    if ( -l $file ) {
        my $target = readlink($file);
        $md5->add( $UTF8 ? Encode::encode_utf8($target) : $target );
        return $md5->b64digest;
    }
    elsif ( -z _ ) {
        $md5->add( $UTF8 ? Encode::encode_utf8($file) : $file );
        return $md5->b64digest;
    }

    my $fh = $getfh->($file) or return;

    my $buf;
    while ( my $l = read( $fh, $buf, 1024 ) ) {
        $md5->add( $UTF8 ? Encode::encode_utf8($buf) : $buf );
    }
    return $md5->b64digest;
}

sub url_md5 {
    my $cn = $getur->( shift() ) or return;
    my ($ut) = shift;
    local $UTF8 = $ut if defined $ut;
    return Digest::MD5::md5($cn) if !$UTF8;
    return Digest::MD5::md5( Encode::encode_utf8($cn) );
}

sub url_md5_hex {
    my $cn = $getur->( shift() ) or return;
    my ($ut) = shift;
    local $UTF8 = $ut if defined $ut;
    return Digest::MD5::md5_hex($cn) if !$UTF8;
    return Digest::MD5::md5_hex( Encode::encode_utf8($cn) );
}

sub url_md5_base64 {
    my $cn = $getur->( shift() ) or return;
    my ($ut) = shift;
    local $UTF8 = $ut if defined $ut;
    return Digest::MD5::md5_base64($cn) if !$UTF8;
    return Digest::MD5::md5_base64( Encode::encode_utf8($cn) );
}

sub Digest::MD5::addpath {
    my $md5 = shift;
    my ( $fl, $bn, $ut ) = @_;
    local $BINMODE = $bn if defined $bn;
    local $UTF8    = $ut if defined $ut;
    if ( ref $fl eq 'ARRAY' ) {
        for my $pth ( @{$fl} ) {
            if ( -l $pth ) {
                my $target = readlink($pth);
                $md5->add( $UTF8 ? Encode::encode_utf8($target) : $target );
            }
            elsif ( -z _ ) {
                $md5->add( $UTF8 ? Encode::encode_utf8($pth) : $pth );
            }
            else {

                $md5->addpath( $pth, $bn, $ut ) or return;
            }
        }
    }
    else {
        if ( -l $fl ) {
            my $target = readlink($fl);
            $md5->add( $UTF8 ? Encode::encode_utf8($target) : $target );
        }
        elsif ( -z _ ) {
            $md5->add( $UTF8 ? Encode::encode_utf8($fl) : $fl );
        }
        else {
            my $fh = $getfh->($fl) or return;
            my $buf;
            while ( my $l = read( $fh, $buf, 1024 ) ) {
                !$UTF8 ? $md5->add($buf) : $md5->add( Encode::encode_utf8($buf) );
            }
        }
    }
    return 1;
}

sub Digest::MD5::addurl {
    my $md5 = shift;
    my $cn  = $getur->( shift() ) or return;
    my $ut  = shift;
    local $UTF8 = $ut if defined $ut;
    !$UTF8 ? $md5->add($cn) : $md5->add( Encode::encode_utf8($cn) );
}

1;

__END__

=head1 NAME

Cpanel::CPAN::Digest::MD5::File - Perl extension for getting MD5 sums for files and urls. 

=head1 SYNOPSIS

    use Cpanel::CPAN::Digest::MD5::File qw(dir_md5_hex file_md5_hex url_md5_hex);

    my $md5 = Digest::MD5->new;
    $md5->addpath('/path/to/file');
    my $digest = $md5->hexdigest;

    my $digest = file_md5($file);
    my $digest = file_md5_hex($file);
    my $digest = file_md5_base64($file);

    my $md5 = Digest::MD5->new;
    $md5->addurl('http://www.tmbg.com/tour.html');
    my $digest = $md5->hexdigest;

    my $digest = url_md5($url);
    my $digest = url_md5_hex($url);
    my $digest = url_md5_base64($url);
  
    my $md5 = Digest::MD5->new;
    $md5->adddir('/directory');
    my $digest = $md5->hexdigest;

    my $dir_hashref = dir_md5($dir);    
    my $dir_hashref = dir_md5_hex($dir);    
    my $dir_hashref = dir_md5_base64($dir);

=head1 DESCRIPTION

  Get MD5 sums for files of a given path or content of a given url.

=head1 EXPORT

None by default.
You can export any file_* dir_*, or url_* function and anything L<Digest::MD5> can export.

   use Cpanel::CPAN::Digest::MD5::File qw(md5 md5_hex md5_base64); # 3 Digest::MD5 functions
   print md5_hex('abc123'), "\n";
   print md5_base64('abc123'), "\n";

=head1 OBJECT METHODS

=head2 addpath()

    my $md5 = Digest::MD5->new;
    $md5->addpath('/path/to/file.txt') 
        or die "file.txt is not where you said: $!";

or you can add multiple files by specifying an array ref of files:

    $md5->addpath(\@files);

=head2 adddir()
 
addpath()s each file in a directory recursively. Follows the same rules as the dir_* functions.

    my $md5 = Digest::MD5->new;
    $md5->adddir('/home/tmbg/') 
        or die "See warning above to see why I bailed: $!";

=head2 addurl()

    my $md5 = Digest::MD5->new;
    $md5->addurl('http://www.tmbg.com/tour.html')
        or die "They Must Be not on tour";

=head1 file_* functions

Get the digest in variouse formats of $file.
If file does not exist or is a directory it croaks (See NOFATALS for more info)

    my $digest = file_md5($file) or warn "$file failed: $!";
    my $digest = file_md5_hex($file) or warn "$file failed: $!";
    my $digest = file_md5_base64($file) or warn "$file failed: $!";

=head1 dir_* functions

Returns a hashref whose keys are files relative to the given path and the values are the MD5 sum of the file or and empty string if a directory.
It recurses through the entire depth of the directory.
Symlinks to files are just addpath()d and symlinks to directories are followed.

    my $dir_hashref = dir_md5($dir) or warn "$dir failed: $!";
    my $dir_hashref = dir_md5_hex($dir) or warn "$dir failed: $!";
    my $dir_hashref = dir_md5_base64($dir) or warn "$dir failed: $!";

=head1 url_* functions

Get the digest in various formats of the content at $url (Including, if $url points to directory, the directory listing content).
Returns undef if url fails (IE if L<LWP::UserAgent>'s $res->is_success is false)

    my $digest = url_md5($url) or warn "$url failed"; 
    my $digest = url_md5_hex($url) or warn "$url failed";
    my $digest = url_md5_base64($url) or warn "$url failed";

=head1 SPECIAL SETTINGS

=head2 BINMODE

By default files are opened in binmode. If you do not want to do this you can unset it a variety of ways:

    use Cpanel::CPAN::Digest::MD5::File qw(-nobin);

or

    $Cpanel::CPAN::Digest::MD5::File::BINMODE = 0;

or at the function/method level by specifying its value as the second argument:

    $md5->addpath($file,0);

    my $digest = file_md5_hex($file,0);

=head2 UTF8

In some cases you may want to have your data utf8 encoded, you can do this the following ways:

    use Cpanel::CPAN::Digest::MD5::File qw(-utf8);

or

    $Cpanel::CPAN::Digest::MD5::File::UTF8 = 1;

or at the function/method level by specifying its value as the third argument for files and second for urls:

    $md5->addpath($file,$binmode,1);

    my $digest = file_md5_hex($file,$binmode,1);

    $md5->addurl($url,1);

    url_md5_hex($url,1);

It use's L<Encode>'s encode_utf8() function to do the encoding. So if you do not have Encode (pre 5.7.3) this won't work :)

=head2 NOFATALS

Instead of croaking it will return undef if you set NOFATALS to true.

You can do this two ways:

    $Cpanel::CPAN::Digest::MD5::File::NOFATALS = 1;

or the -nofatals flag:

    use Cpanel::CPAN::Digest::MD5::File qw(-nofatals);

    my $digest = file_md5_hex($file) or die "$file failed";

$! is not set so its not really helpful if you die(). 

=head1 SEE ALSO

L<Digest::MD5>, L<Encode>, L<LWP::UserAgent>

=head1 AUTHOR

Daniel Muey, L<http://drmuey.com/cpan_contact.pl>

=head1 COPYRIGHT AND LICENSE

Copyright 2005 by Daniel Muey

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself. 

=cut
