#!/usr/bin/perl

# cpanel - cPanel/CMS/E107/mysql_normalized_dump.pl Copyright(c) 2013 cPanel, Inc.
#                                                            All rights Reserved.
# copyright@cpanel.net                                         http://cpanel.net
# This code is subject to the cPanel license. Unauthorized copying is prohibited

use strict;
use warnings;

use Getopt::Long;

my $IPv4_re = qr/(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)/;

my ( $username, $password, $database );
my $table_prefix = 'e107_';
my $output       = 'mysqldump.sql';
GetOptions(
    'help' => \&printHelp,

    'username=s'     => \$username,
    'password=s'     => \$password,
    'db|database=s'  => \$database,
    'table-prefix=s' => \$table_prefix,
    'output=s'       => \$output,
);

# Ensure required data is provided.
die "Username not specified" unless $username;

# Provide default values for unspecified options.
$database ||= $username . "_e1071";

my $unix_time;

# Open (create or truncate) output file and print header line.
open FILE, '+>', $output or die "Cannot open file '$output': $!";
print FILE "USE [% e107 %];\n\n";

if ( !$password ) {
    $password = '';
    print "Enter MySQL password for user '$username'\n";
}

# Dump the MySQL database (using mysqldump) and normalize any known values
# to make diffing easier.
my @cmd = ( 'mysqldump', '-h', 'localhost', '-u', $username, "-p$password", $database );
open PIPE, '-|', @cmd or die "Cannot dump MySQL database contents: $!";
while (<PIPE>) {

    # Normalize the values and then print them to the file

    # Clean up header comments to minimize the diff noise.
    s/(?<=^-- Host: localhost    Database: )\Q$database\E/[% e107 %]/;

    # Change all table names to use [% table_prefix %].  Some column names may also
    # contain the table_prefix.  Since column names are usually preceeded by a non-word,
    # like a comma or empty spaces, and table names are usually preceeded with a word -
    # or more precisely, a command - we do a zero-width lookback to mimimize errors.
    s/(?<=\w `)\Q$table_prefix\E(?=\w)/[% table_prefix %]_/;

    # Remove IP addressed by setting them to the loopback address.  This makes it
    # clear that the user did the work locally - that is, via the cPanel interface.
    # Note: IPv6 is too complicated - check out Regexp::IPv6 if the need arises.
    s/(?<=')$IPv4_re(?=')/127.0.0.1/g;

    # Normalize the install path.
    s/(?<=\\'siteurl\\' => \\'\/).*?(?=\/\\',)/[% installpath %]/g;

    # Normalize the admin's email address.
    s/(?<=')admin\@yoursite\.com(?=\\?')/[% email %]/g;

    # Normalize the password field; limited to the _user table for safety
    if (m/^INSERT INTO `\[% table_prefix %\]_user`/) {
        s/(?<=')[0-9a-f]{32}(?=')/[% password_md5_hex %]/i;
    }

    # Try to capture the timestamp used throughout the file.  Any instances close to
    # (within 5 seconds of) this unix time will be replaced.
    $unix_time = $1 if m/\\'install_date\\' => (\d{10,}),/;
    s/\b(\d{10,})\b/abs($1 - $unix_time) < 5 ? "[% unix_time %]" : $1/eg if $unix_time;

    # Remove minor stylistic differences to minimize the diff noise.
    s/(?<= )DEFAULT(?= (?:'.*'|\d+|NULL),?$)/default/i;
    s/(?<= )AUTO_INCREMENT(?=,?$)/auto_increment/i;
    s/PRIMARY KEY *(?=\()/PRIMARY KEY  /;

    # We don't care about any previously online visitors; remove them entirely.
    next if m/^INSERT INTO `\[% table_prefix %\]_online` VALUES/;

    print FILE $_;
}

close FILE or warn "Cannot close file '$output': $!";

exit 0;

sub printHelp {
    print <<EOH;
Usage: $0 -u <username> [OPTIONS]

Options:
   -u <username>  MySQL username (required).
   -p <password>  MySQL password.  If not provided, user will be prompted.
   -d <database>  MySQL database to dump (defaults to '<username>_e1071').
   -t <prefix>    Table prefix used (defaults to 'e107_').
   -o <file>      Output file (defaults to 'mysqldump.sql').
EOH
}
