#!/usr/bin/perl

# V 0.1.0 030603 dlweb.pl
#
# Licensed under GPL
#
#  ¢  ¢
#  ¢ ¢
#  ¢     ¢¢  ¢  ¢  ¢¢   ¢¢     ¢¢   ¢¢   ¢¢
#  ¢ ¢  ¢  ¢ ¢  ¢ ¢  ¢ ¢  ¢   ¢  ¢ ¢  ¢ ¢  ¢
#  ¢  ¢ ¢     ¢¢  ¢  ¢  ¢¢¢ ¢  ¢¢  ¢     ¢¢¢
#                         ¢                ¢
#        martin         ¢¢     krung     ¢¢
#
# ++++++++++++++++++++++++++++++++++++++++++++++++
#

#
#  DESCRIPTION
#
#  if you  write a paper in which you refer to 
#  a website, its importent to keep the source. 
#  so download them, because they disappear fast. 
#
#  downloads a web page into an folder, gzip the folder
#  and put out the links to publish it on a website.
#  

#
#  HINT
#
#  dlweb.pl means DownLoadWEB
#

#
#  USAGE
#  
#  first argument is the URL you want to download.
#  	
#  second argument is the folder in which to download.
#
#  these two argument are mandatory
#
#  third argument is optional:
#
#  it should be an number between 1 and 3,
#  its for recursive download
#
#  output only works correct if used in the folder set to docroot
#

#
#  EXAMPLE
#
#  downloads my website recursive with deppness 2
#
#  perl dlweb.pl http://krungkuene.org krungkuene 2
#
#
#  output look like this:
#
#  web:  http://krungkuene.org/index.html
#  loc:  https://krungkuene.org/krung/03/krungkuene/index.html
#  dwl:  https://krungkuene.org/krung/03/krungkuene.tgz
#

#
#  REQUIRE
#  
#  wget, mkdir, tar
#

#
#  WARNING
#
#  cannot damage anything 
#  
#  still use it on your own risk


# collects the argument

$url = $ARGV[0];
$dir =  $ARGV[1];
$recursive = $ARGV[2];

# sets the agent type which wget pretends to be
# because some website disallow agent wget

$agent = '"Mozilla/4.0 (compatible; MSIE 5.0; UNIX) Opera 6.1  [en]"';

# the document root of your website and the URL of it
$docroot = "/home/secure/public_html/";
$website = "https://krungkuene.org/";

#counts the argument, if less then 2, warning is printed out
my $arg = @ARGV;

if ($arg < 2){
print "give first argument URL and second argument folder to download \n";
exit;
}

# creates folder and change into it
`mkdir $dir`;

chdir ($dir);

# gets absolute to this folder

$pwd = `pwd`;
print $pwd;


# completes wget options and starts the download 

if ($recursive > 0){
	$option = "-r -l $recursive";

	print "wget -p -k -nd -nv -H -F -U $agent $option $url\n";
`	wget -p -k -nd -nv -H -F -U $agent $option $url`;

	} else{
	print "wget -p -k -nd -nv -H -F -U $agent $url\n";
	`wget -p -k -nd -nv -H -F -U $agent $url`;
}


# change into parent folder
 
chdir ("..");

# creates .tgz of downloaded stuff

my $tar = $dir.".tgz";

`tar -zcf $tar $dir`;

# creates of the local path an URL
$pwd = `pwd`;
chomp($pwd);

$pwd =~ s/$docroot/$website/;


# extract the name of the downloaded file
$filename = $url;
$filename =~ s#.*/##;


# creates the links and writes them out

$dir = $pwd."/".$dir."/".$filename;
$tar = $pwd."/".$tar;


print "\n\n";
print "web:  ".$url."\n";
print "loc:  ".$dir."\n";
print "dwl:  ".$tar."\n";
print "\n\n";


