[SCM] Video extraction utility for YouTube, Google Video and other video sites (Debian packaging) branch, master, updated. upstream/2.1.7-214-g6762190

Wed Apr 1 14:45:26 UTC 2009

The following commit has been merged in the master branch:
commit 262d650be0eb06b977fe93ccc288ef3600068780
Author: legatvs <legatvs at gmail.com>
Date:   Sun Mar 29 15:23:56 2009 +0300

    Add CCtv support.

diff --git a/clive b/clive
index de44523..fa0c5e2 100755
--- a/clive
+++ b/clive
@@ -85,6 +85,7 @@ my %re_hosts = (        # Precompiled regex used to identify the host
                  IsLiveleak  => qr|liveleak.com|i,
                  IsEvisor    => qr|evisor.tv|i,
                  IsDmotion   => qr|dailymotion.com|i,
+                 IsCctv      => qr|tv.cctv.com|i,
                );
 
 # Parse config
@@ -332,7 +333,7 @@ sub process_page
     print "done.\nprocess page ..."
       unless $opts{quiet};
 
-    $$response_ref =~ tr{\n}//d;
+    #$$response_ref =~ tr{\n}//d;
 
     my $p = HTML::TokeParser->new($response_ref);
     $p->get_tag("title");
@@ -367,6 +368,10 @@ sub process_page
     {
         ($xurl, $id) = handle_dmotion($response_ref);
     }
+    elsif ($url =~ /$re_hosts{IsCctv}/)
+    {
+        ($xurl, $id) = handle_cctv_space($response_ref, $response_fh, $url);
+    }
     return -1
       if !$xurl
           or !$id
@@ -427,10 +432,9 @@ sub query_video_length
             {
                 if ($opts{format} eq "flv")
                 {
-                    if ($entry{page_url} =~ /$re_hosts{IsEvisor}/)
+                    if (   $entry{page_url} =~ /$re_hosts{IsEvisor}/
+                        or $entry{page_url} =~ /$re_hosts{IsCctv}/)
                     {
-
-                        #or $entry{page_url} =~ /$re_hosts{IsMetacafe}/ ) {
                         $entry{file_suffix} = "flv";
                         $content_ok = 1;
                     }
@@ -794,7 +798,7 @@ sub handle_google
         GrabMP4      => qr|href="http://vp\.(.*?)"|,
     );
 
-    my $id  = $1 if $$response_ref =~ /$re{GrabID}/;
+    my $id = $1 if $$response_ref =~ /$re{GrabID}/;
 
     my $xurl = URI::Escape::uri_unescape($1)
       if $$response_ref =~ /$re{GrabVideoURL}/;
@@ -990,6 +994,75 @@ sub handle_dmotion
     return ($xurl, $id);
 }
 
+sub handle_cctv_space
+{
+    my ($resp, $resp_fh, $page_url) = @_;
+    my $re = qr|videoId=(.*?)&|;
+
+    my ($id, $xurl);
+    $id = $1 if $$resp =~ /$re/;
+
+    if ($id)
+    {
+        my $domain = join('.',strdomain($page_url));
+        my $conf_url =
+          "http://$domain/playcfg/flv_info_new.jsp?videoId=$id";
+        $xurl = fetch_cctv_space_config($conf_url, $resp_fh);
+    }
+    else
+    {
+        print STDERR "\nerror: id not found\n";
+    }
+
+    return ($xurl, $id);
+}
+
+sub fetch_cctv_space_config
+{
+    my ($conf_url, $resp_fh) = @_;
+
+    print "done.\nfetch config file ..."
+      unless $opts{quiet};
+
+    my $conf = "";
+    open my $fh, ">", \$conf;
+
+    # Disable: header
+    $curl->setopt(CURLOPT_HEADER,    0);
+    $curl->setopt(CURLOPT_URL,       $conf_url);
+    $curl->setopt(CURLOPT_WRITEDATA, $fh);
+
+    my $rc = $curl->perform;
+    close $fh;
+
+    my ($xurl, $errmsg);
+    if ($rc == 0)
+    {
+        my $re = qr|"url":"(.*?)"|;
+        if ($conf =~ /$re/)
+        {
+            $xurl = "http://v.cctv.com/flash/$1";
+        }
+        else
+        {
+            $errmsg = "extraction url not found";
+        }
+    }
+    else
+    {
+        $errmsg = $curl->strerror($rc) . " http/$rc\n";
+    }
+
+    print STDERR "\nerror: $errmsg\n" if $errmsg;
+
+    # Re-enable: header, reset WRITEDATA, the above overrides the
+    # original settings.
+    $curl->setopt(CURLOPT_HEADER,    0);
+    $curl->setopt(CURLOPT_WRITEDATA, $resp_fh);
+
+    return $xurl;
+}
+
 # Subroutines: Progress
 # NOTE: the 'dot' progress copies much from wget.
 
@@ -1609,28 +1682,36 @@ sub fetch_sevenload_configxml
     return ($xurl, $id);
 }
 
+sub strdomain
+{
+    my $uri = shift;
+
+    my ($scheme, $authority, $path, $query, $fragment) = $uri =~
+      m{(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?}o;
+
+    # Extract the domain from the URL.
+    my @a = split(/\./, $authority);
+
+    return @a;
+}
+
 sub title_to_filename
 {
     my $title = shift;
 
-    $title =~ s/(youtube|video|liveleak.com|sevenload|dailymotion)//gi;
+    $title =~
+      s/(youtube|video|liveleak.com|sevenload|dailymotion|cctv.com)//gi;
     $title =~ s/^\s+//;
     $title =~ s/\s+$//;
 
     my $r = $opts{cclass} || qr|\w|;
     $title = join('', $title =~ /$r/g);
 
-    # Courtesy of:
-    #   http://search.cpan.org/~gaas/URI-1.37/URI.pm#PARSING_URIs_WITH_REGEXP
-    my ($scheme, $authority, $path, $query, $fragment) =
-      m{(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?}o;
-
-    # Extract the domain from the URL.
-    my @a = split(/\./, $authority);
-
     my $fn = $opts{fnfmt} || "%t_%d_%i.%s";
     my $timestamp = POSIX::strftime("%F %T", localtime);
 
+    my @a = strdomain($entry{page_url});
+
     my %h = (
              "%t" => $title,
              "%s" => $entry{file_suffix},
diff --git a/tests/cctv.url b/tests/cctv.url
new file mode 100644
index 0000000..30ee3e0
--- /dev/null
+++ b/tests/cctv.url
@@ -0,0 +1 @@
+http://space.tv.cctv.com/video/VIDE1212909276513233

-- 
Video extraction utility for YouTube, Google Video and other video sites (Debian packaging)