[SCM] Video extraction utility for YouTube, Google Video and other video sites (Debian packaging) branch, master, updated. upstream/2.1.7-214-g6762190
Damyan Ivanov
dmn at debian.org
Wed Apr 1 14:45:33 UTC 2009
The following commit has been merged in the master branch:
commit 9984173cdccf6016967f27eee608d7ae354e9bfc
Author: Damyan Ivanov <dmn at debian.org>
Date: Wed Apr 1 11:48:38 2009 +0300
Imported Upstream version 2.1.9
diff --git a/CHANGES b/CHANGES
index 74a42fd..450b7e5 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,18 +1,3 @@
-Version 2.1.10
-
- User-visible changes:
- * Change closer to cclive output
- - Paving the way for abby (front-end)
- * Add Cctv support
- - Thanks to suntong001 for data mining
- * Add --hosts for printing supported hosts
- * Change --version output
- * Add --stderr option
-
- Developer-visible changes:
- * Replace if-elsif $host block with a loop
-
-
Version 2.1.9
(March 27, 2009)
diff --git a/clive b/clive
index c42080c..de44523 100755
--- a/clive
+++ b/clive
@@ -21,7 +21,7 @@
use warnings;
use strict;
-use constant VERSION => "2.1.10";
+use constant VERSION => "2.1.9";
use constant MBDIV => 0x100000;
use constant SHOWFMT_DEFAULT => qq/%D: "%t" | %mMB/;
@@ -85,21 +85,8 @@ my %re_hosts = ( # Precompiled regex used to identify the host
IsLiveleak => qr|liveleak.com|i,
IsEvisor => qr|evisor.tv|i,
IsDmotion => qr|dailymotion.com|i,
- IsCctv => qr|tv.cctv.com|i,
);
-my @re_hosts_arr = (
- [$re_hosts{IsYoutube}, \&handle_youtube],
- [$re_hosts{IsGoogle}, \&handle_google],
- [$re_hosts{IsSevenload}, \&handle_sevenload],
- [$re_hosts{IsBreak}, \&handle_break],
- [$re_hosts{IsLastfm}, \&handle_lastfm],
- [$re_hosts{IsLiveleak}, \&handle_liveleak],
- [$re_hosts{IsEvisor}, \&handle_evisor],
- [$re_hosts{IsDmotion}, \&handle_dmotion],
- [$re_hosts{IsCctv}, \&handle_cctv],
- );
-
# Parse config
my $c = Config::Tiny->read($CONFIGFILE);
%opts = (
@@ -135,8 +122,7 @@ GetOptions(
'output|o=s', 'append|a=s', 'background|b', 'quiet|q',
'grep|g=s', 'agent|U=s', 'proxy|y=s', 'savedir|S=s',
'cclass|l=s', 'exec|x=s', 'progress|G=s', 'clivepass|V=s',
- 'stream=i', 'stderr',
- 'hosts' => \&print_hosts,
+ 'stream=i',
'version|v' => \&print_version,
# Commented out until WWW::Curl is fixed:
@@ -343,29 +329,44 @@ sub process_page
{
my ($url, $response_ref, $response_fh) = @_;
- #$$response_ref =~ tr{\n}//d;
+ print "done.\nprocess page ..."
+ unless $opts{quiet};
+
+ $$response_ref =~ tr{\n}//d;
my $p = HTML::TokeParser->new($response_ref);
$p->get_tag("title");
my $title = $p->get_trimmed_text;
- my ($xurl, $id, $_title, $supported);
- $supported = 0;
- foreach (@re_hosts_arr)
+ my ($xurl, $id);
+ if ($url =~ /$re_hosts{IsYoutube}/)
{
- my ($re, $handler) = @{$_};
- if ($url =~ /$re/)
- {
- $supported = 1;
- ($xurl, $id, $_title) =
- &$handler($response_ref, $response_fh, $url);
- $title = $_title || $title;
- last;
- }
+ ($xurl, $id) = handle_youtube($response_ref);
+ }
+ elsif ($url =~ /$re_hosts{IsGoogle}/)
+ {
+ ($xurl, $id) = handle_google($response_ref);
+ }
+ elsif ($url =~ /$re_hosts{IsSevenload}/)
+ {
+ ($xurl, $id) = handle_sevenload($response_ref, $response_fh);
+ }
+ elsif ($url =~ /$re_hosts{IsBreak}/)
+ {
+ ($xurl, $id, $title) = handle_break($response_ref);
+ }
+ elsif ($url =~ /$re_hosts{IsLiveleak}/)
+ {
+ ($xurl, $id) = handle_liveleak($response_ref, $response_fh);
+ }
+ elsif ($url =~ /$re_hosts{IsEvisor}/)
+ {
+ ($xurl, $id) = handle_evisor($response_ref);
+ }
+ elsif ($url =~ /$re_hosts{IsDmotion}/)
+ {
+ ($xurl, $id) = handle_dmotion($response_ref);
}
- die "error: lookup array missing handler; should never get here\n"
- if !$supported;
-
return -1
if !$xurl
or !$id
@@ -388,7 +389,7 @@ sub query_video_length
unless ($entry{file_length})
{
- print "done.\nverify video link ..."
+ print "done.\nquery length ..."
unless $opts{quiet};
$curl->setopt(CURLOPT_URL, $entry{xurl});
@@ -426,9 +427,10 @@ sub query_video_length
{
if ($opts{format} eq "flv")
{
- if ( $entry{page_url} =~ /$re_hosts{IsEvisor}/
- or $entry{page_url} =~ /$re_hosts{IsCctv}/)
+ if ($entry{page_url} =~ /$re_hosts{IsEvisor}/)
{
+
+ #or $entry{page_url} =~ /$re_hosts{IsMetacafe}/ ) {
$entry{file_suffix} = "flv";
$content_ok = 1;
}
@@ -527,14 +529,17 @@ sub extract_video
unless ($opts{quiet})
{
- print "file: $fn";
+ print "file: $fn\n";
+ print "length: $entry{file_length} ";
- if ($cont_from)
- {
- printf("\nfrom: $cont_from (%.1fMB) "
- . "remaining: $remaining (%.1fMB)",
- $cont_from / MBDIV, $remaining / MBDIV);
- }
+ printf "(%.2fMB) ", $entry{file_length} / MBDIV
+ if $entry{file_length};
+
+ printf "from: %u (left: %u) ", $cont_from, $remaining
+ if $cont_from;
+
+ printf "[$content_type]"
+ if $content_type;
print "\n";
}
@@ -567,8 +572,7 @@ sub extract_video
# Use 'dot' progress if the output is not a TTY
if ( $opts{progress} !~ /^dot/
- and $opts{progress} ne 'none'
- and !$opts{stderr})
+ and $opts{progress} ne 'none')
{
$opts{progress} = 'dot'
if !-t STDOUT or !-t STDERR;
@@ -624,8 +628,9 @@ sub extract_video
if (!$errmsg)
{
- print "\n"
+ print "\nclosed http/$rc.\n"
unless $opts{quiet};
+
push @exec_files, $path
if $opts{exec};
}
@@ -747,7 +752,7 @@ sub parse_input
sub handle_youtube
{
- my ($response_ref, $xurl) = @_;
+ my ($response_ref, $xurl) = (shift);
my %re = (
GrabID => qr/"video_id": "(.*?)"/,
@@ -780,7 +785,7 @@ sub handle_youtube
sub handle_google
{
- my ($response_ref) = @_;
+ my $response_ref = shift;
my %re = (
@@ -789,7 +794,7 @@ sub handle_google
GrabMP4 => qr|href="http://vp\.(.*?)"|,
);
- my $id = $1 if $$response_ref =~ /$re{GrabID}/;
+ my $id = $1 if $$response_ref =~ /$re{GrabID}/;
my $xurl = URI::Escape::uri_unescape($1)
if $$response_ref =~ /$re{GrabVideoURL}/;
@@ -835,7 +840,7 @@ sub handle_sevenload
sub handle_break
{
- my ($response_ref) = @_;
+ my $response_ref = shift;
my %re = (
GrabTitle => qr|id="vid_title" content="(.*?)"|,
@@ -913,7 +918,7 @@ sub handle_liveleak
sub handle_evisor
{
- my ($respr) = @_;
+ my $respr = shift;
my %re = (
GrabXurl => qr|file=(.*?)"|,
@@ -942,7 +947,7 @@ sub handle_evisor
sub handle_dmotion
{
- my ($resp) = @_;
+ my $resp = shift;
my %re = (
GrabID => qr|swf%2F(.*?)"|,
@@ -985,29 +990,6 @@ sub handle_dmotion
return ($xurl, $id);
}
-sub handle_cctv
-{
- my ($resp, $resp_fh, $page_url) = @_;
- my $re = qr|videoId=(.*?)&|;
-
- my ($id, $xurl);
- $id = $1 if $$resp =~ /$re/;
-
- if ($id)
- {
- my $domain = join('.', strdomain($page_url));
- my $conf_url =
- "http://$domain/playcfg/flv_info_new.jsp?videoId=$id";
- $xurl = fetch_cctv_space_config($conf_url, $resp_fh);
- }
- else
- {
- print STDERR "\nerror: id not found\n";
- }
-
- return ($xurl, $id);
-}
-
# Subroutines: Progress
# NOTE: the 'dot' progress copies much from wget.
@@ -1363,22 +1345,11 @@ sub main
select STDOUT;
$| = 1;
- if ($opts{background})
- {
- daemonize();
- }
- else
- {
- if ($opts{stderr})
- {
-
- # redirect stdout to stderr
- open STDOUT, ">&STDERR"
- or die "error: cannot dup STDOUT: $!";
- }
- }
+ daemonize()
+ if $opts{background};
process_queue();
+
free_cache();
}
@@ -1638,82 +1609,28 @@ sub fetch_sevenload_configxml
return ($xurl, $id);
}
-sub fetch_cctv_space_config
-{
- my ($conf_url, $resp_fh) = @_;
-
- print "done.\nfetch config file ..."
- unless $opts{quiet};
-
- my $conf = "";
- open my $fh, ">", \$conf;
-
- # Disable: header
- $curl->setopt(CURLOPT_HEADER, 0);
- $curl->setopt(CURLOPT_URL, $conf_url);
- $curl->setopt(CURLOPT_WRITEDATA, $fh);
-
- my $rc = $curl->perform;
- close $fh;
-
- my ($xurl, $errmsg);
- if ($rc == 0)
- {
- my $re = qr|"url":"(.*?)"|;
- if ($conf =~ /$re/)
- {
- $xurl = "http://v.cctv.com/flash/$1";
- }
- else
- {
- $errmsg = "extraction url not found";
- }
- }
- else
- {
- $errmsg = $curl->strerror($rc) . " http/$rc\n";
- }
-
- print STDERR "\nerror: $errmsg\n" if $errmsg;
-
- # Re-enable: header, reset WRITEDATA, the above overrides the
- # original settings.
- $curl->setopt(CURLOPT_HEADER, 0);
- $curl->setopt(CURLOPT_WRITEDATA, $resp_fh);
-
- return $xurl;
-}
-
-sub strdomain
-{
- my $uri = shift;
-
- my ($scheme, $authority, $path, $query, $fragment) = $uri =~
- m{(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?}o;
-
- # Extract the domain from the URL.
- my @a = split(/\./, $authority);
-
- return @a;
-}
-
sub title_to_filename
{
my $title = shift;
- $title =~
- s/(youtube|video|liveleak.com|sevenload|dailymotion|cctv.com)//gi;
+ $title =~ s/(youtube|video|liveleak.com|sevenload|dailymotion)//gi;
$title =~ s/^\s+//;
$title =~ s/\s+$//;
my $r = $opts{cclass} || qr|\w|;
$title = join('', $title =~ /$r/g);
+ # Courtesy of:
+ # http://search.cpan.org/~gaas/URI-1.37/URI.pm#PARSING_URIs_WITH_REGEXP
+ my ($scheme, $authority, $path, $query, $fragment) =
+ m{(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?}o;
+
+ # Extract the domain from the URL.
+ my @a = split(/\./, $authority);
+
my $fn = $opts{fnfmt} || "%t_%d_%i.%s";
my $timestamp = POSIX::strftime("%F %T", localtime);
- my @a = strdomain($entry{page_url});
-
my %h = (
"%t" => $title,
"%s" => $entry{file_suffix},
@@ -1972,21 +1889,15 @@ sub emit
if $opts{emitxml} and @emit_queue;
}
-sub print_hosts
-{
- print "$re_hosts{$_}\n" foreach (keys %re_hosts);
- exit;
-}
-
sub print_version
{
my $perl_v = sprintf("--with-perl=%vd-%s", $^V, $^O);
- my $str =
- sprintf("clive version %s with WWW::Curl version "
- . "$WWW::Curl::VERSION [%s].\n"
+ my $str = sprintf(
+ "clive %s. "
. "Copyright (c) 2007-2009 Toni Gundogdu "
- . "<legatvs\@gmail.com>.\n\n",
- VERSION, $^O);
+ . "<legatvs\@gmail.com>.\n",
+ VERSION
+ );
$str .= "$perl_v ";
my $i = 0;
while (my ($key, $value) = each(%opted_mods))
@@ -1995,9 +1906,9 @@ sub print_version
$str .= "\n" if (++$i % 2 == 0);
}
$str .=
- "\nclive is licensed under the ISC license which is functionally\n"
+ "clive is licensed under the ISC license which is functionally\n"
. "equivalent to the 2-clause BSD licence.\n"
- . "\tReport bugs: <http://code.google.com/p/clive/issues/>\n";
+ . "\tReport bugs to <http://code.google.com/p/clive/issues/>.\n";
print "$str";
exit;
}
@@ -2012,7 +1923,6 @@ clive [options]... [URL]...
-h, --help print help and exit
-v, --version print version and exit
- --hosts print supported hosts and exit
-b, --background go to background after startup
-e, --emit-csv emit video details as csv to stdout
-E, --emit-xml emit video details as csv to stdout
@@ -2037,7 +1947,6 @@ Logging and Input Options:
-r, --recall recall last url batch
-T, --savebatch=FILE save url batch to FILE
-p, --paste paste input from clipboard
- --stderr redirect all output to stderr even when no tty
Download Options:
-O, --output-video=FNAME write video to file
-n, --no-extract do not extract any videos
diff --git a/clive.1 b/clive.1
index 1e0dc65..879430c 100644
--- a/clive.1
+++ b/clive.1
@@ -1,4 +1,4 @@
-.\" Automatically generated by Pod::Man 2.16 (Pod::Simple 3.05)
+.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.35
.\"
.\" Standard preamble:
.\" ========================================================================
@@ -25,11 +25,11 @@
..
.\" Set up some character translations and predefined strings. \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
-.\" double quote, and \*(R" will give a right double quote. \*(C+ will
-.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
-.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
-.\" nothing in troff, for use with C<>.
-.tr \(*W-
+.\" double quote, and \*(R" will give a right double quote. | will give a
+.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
+.\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C'
+.\" expand to `' in nroff, nothing in troff, for use with C<>.
+.tr \(*W-|\(bv\*(Tr
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
. ds -- \(*W-
@@ -48,25 +48,22 @@
. ds R" ''
'br\}
.\"
-.\" Escape single quotes in literal strings from groff's Unicode transform.
-.ie \n(.g .ds Aq \(aq
-.el .ds Aq '
-.\"
.\" If the F register is turned on, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
.\" entries marked with X<> in POD. Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
-.ie \nF \{\
+.if \nF \{\
. de IX
. tm Index:\\$1\t\\n%\t"\\$2"
..
. nr % 0
. rr F
.\}
-.el \{\
-. de IX
-..
-.\}
+.\"
+.\" For nroff, turn off justification. Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.hy 0
+.if n .na
.\"
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear. Run. Save yourself. No user-serviceable parts.
@@ -132,11 +129,7 @@
.\" ========================================================================
.\"
.IX Title "clive 1"
-.TH clive 1 "2009-03-30" "2.1.10" "clive manual"
-.\" For nroff, turn off justification. Always turn off hyphenation; it makes
-.\" way too many mistakes in technical documents.
-.if n .ad l
-.nh
+.TH clive 1 "2009-03-22" "2.1.8" "clive manual"
.SH "NAME"
clive \- the non\-interactive video extraction utility
.SH "SYNOPSIS"
@@ -148,7 +141,7 @@ clive is a command line utility for extracting videos from Youtube and other
video sharing Web sites. It was originally written to bypass the Adobe Flash
requirement needed to view the hosted videos.
.PP
-clive can be non-interactive. This means it can work in the background
+clive can be non\-interactive. This means it can work in the background
while the user is not logged on, allowing the user to start an extraction
and disconnect from the system and letting clive finish the work.
.PP
@@ -174,9 +167,6 @@ Print this help and exit.
.IP "\fB\-v \-\-version\fR" 4
.IX Item "-v --version"
Print version and exit.
-.IP "\fB\-\-hosts\fR" 4
-.IX Item "--hosts"
-Print supported hosts and exit.
.IP "\fB\-b \-\-background\fR" 4
.IX Item "-b --background"
Go to background immediately after startup. If no output file is specified
@@ -270,14 +260,11 @@ Recall the last \s-1URL\s0 batch from the previous runtime. Note that clive over
this information at every runtime. See also \*(L"\s-1CACHE\s0\*(R".
.IP "\fB\-T \-\-savebatch=\fR\fIfile\fR" 4
.IX Item "-T --savebatch=file"
-Save current \s-1URL\s0 batch to \fIfile\fR.
+Save current \s-1URL\s0 batch to \fIfile\fR.
.IP "\fB\-p \-\-paste\fR" 4
.IX Item "-p --paste"
Paste input from clipboard. The pasted URLs are expected to be separated with
newlines.
-.IP "\fB\-\-stderr\fR" 4
-.IX Item "--stderr"
-Redirect all input to stderr, even if no \s-1TTY\s0 is available.
.PP
\&\fBDownload Options\fR
.IP "\fB\-O \-\-output\-video=\fR\fIfile\fR" 4
@@ -326,7 +313,7 @@ Progress indication can be disabled completely by setting \fItype\fR to \*(L"non
.IP "\fB\-u \-\-youtube\-user=\fR\fIusername\fR" 4
.IX Item "-u --youtube-user=username"
\&\fIusername\fR to log into Youtube. Typically needed only if you are planning
-to extract videos flagged as mature-content.
+to extract videos flagged as mature\-content.
.IP "\fB\-t \-\-youtube\-pass=\fR\fIpassword\fR" 4
.IX Item "-t --youtube-pass=password"
\&\fIpassword\fR to log into Youtube. If \fIpassword\fR is \*(L"\-\*(R", causes clive to
@@ -513,14 +500,11 @@ Formats: (spark|spak\-mini|vp6\-hq|vp6\-hd|vp6|h264) Continue: Yes
.Sp
spark is the \*(L"regular\*(R" 320x240 flv. \fBclive\fR defaults to spark
if requested format is not available.
-.IP "\fBtv.cctv.com\fR" 4
-.IX Item "tv.cctv.com"
-Formats: flv Continue: Yes
.SH "CACHE"
.IX Header "CACHE"
The cache has two purposes:
.IP "1." 4
-Keep reusable video data available for fast re-extraction.
+Keep reusable video data available for fast re\-extraction.
.IP "2." 4
Keep a record of extracted videos.
.PP
@@ -579,7 +563,7 @@ Development repository can be cloned with:
For release announcements, subscribe to the project at:
<http://freshmeat.net/projects/clive/>
.PP
-Additional utilities (clive-utils):
+Additional utilities (clive\-utils):
<http://code.google.com/p/clive\-utils/>
.SH "AUTHOR"
.IX Header "AUTHOR"
diff --git a/clive.pod b/clive.pod
index 7949eec..fc44c74 100644
--- a/clive.pod
+++ b/clive.pod
@@ -44,10 +44,6 @@ Print this help and exit.
Print version and exit.
-=item B<--hosts>
-
-Print supported hosts and exit.
-
=item B<-b --background>
Go to background immediately after startup. If no output file is specified
@@ -179,10 +175,6 @@ Save current URL batch to I<file>.
Paste input from clipboard. The pasted URLs are expected to be separated with
newlines.
-=item B<--stderr>
-
-Redirect all input to stderr, even if no TTY is available.
-
=back
B<Download Options>
@@ -480,10 +472,6 @@ Formats: (spark|spak-mini|vp6-hq|vp6-hd|vp6|h264) Continue: Yes
spark is the "regular" 320x240 flv. B<clive> defaults to spark
if requested format is not available.
-=item B<tv.cctv.com>
-
-Formats: flv Continue: Yes
-
=back
=head1 CACHE
diff --git a/tests/cctv.url b/tests/cctv.url
deleted file mode 100644
index 30ee3e0..0000000
--- a/tests/cctv.url
+++ /dev/null
@@ -1 +0,0 @@
-http://space.tv.cctv.com/video/VIDE1212909276513233
--
Video extraction utility for YouTube, Google Video and other video sites (Debian packaging)
More information about the Pkg-perl-cvs-commits
mailing list