r28164 - in /branches/upstream/libspreadsheet-xlsx-perl/current: Changes META.yml Makefile.PL lib/Spreadsheet/XLSX.pm
gregoa at users.alioth.debian.org
gregoa at users.alioth.debian.org
Sat Dec 13 02:11:50 UTC 2008
Author: gregoa
Date: Sat Dec 13 02:11:48 2008
New Revision: 28164
URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=28164
[svn-upgrade] Integrating new upstream version, libspreadsheet-xlsx-perl (0.05)
Modified: branches/upstream/libspreadsheet-xlsx-perl/current/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libspreadsheet-xlsx-perl/current/Changes?rev=28164&op=diff
--- branches/upstream/libspreadsheet-xlsx-perl/current/Changes (original)
+++ branches/upstream/libspreadsheet-xlsx-perl/current/Changes Sat Dec 13 02:11:48 2008
@@ -17,4 +17,14 @@
0.04 Fri Nov 14 09:25:47 MSK 2008
- - RE fixed for the case of opening <t ...> with attributes (thanks Loreyna Yeung)
+ - RE fixed for the case of opening <t ...> with attributes (thanks Loreyna Yeung)
+0.05 Fri Dec 12 17:28:23 MSK 2008
+ - a lot of fixes by Rob Polocz (dependency on Spreadsheet::ParseExcel introduced):
+ -- Added support for styles and formatted strings;
+ -- create and use ParseExcel Workbook, Spreadsheet, and Cell objects;
+ -- 1904 date convention support;
+ -- empty tag support;
+ -- received permission from the Spreadsheet::ParseExcel guys to leverage the formatting classes and check them in to this project.
Modified: branches/upstream/libspreadsheet-xlsx-perl/current/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libspreadsheet-xlsx-perl/current/META.yml?rev=28164&op=diff
--- branches/upstream/libspreadsheet-xlsx-perl/current/META.yml (original)
+++ branches/upstream/libspreadsheet-xlsx-perl/current/META.yml Sat Dec 13 02:11:48 2008
@@ -1,6 +1,6 @@
--- #YAML:1.0
name: Spreadsheet-XLSX
-version: 0.04
+version: 0.05
abstract: Perl extension for reading MS Excel 2007 files;
license: ~
@@ -9,6 +9,7 @@
distribution_type: module
Archive::Zip: 1.18
+ Spreadsheet::ParseExcel:
url: http://module-build.sourceforge.net/META-spec-v1.3.html
version: 1.3
Modified: branches/upstream/libspreadsheet-xlsx-perl/current/Makefile.PL
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libspreadsheet-xlsx-perl/current/Makefile.PL?rev=28164&op=diff
--- branches/upstream/libspreadsheet-xlsx-perl/current/Makefile.PL (original)
+++ branches/upstream/libspreadsheet-xlsx-perl/current/Makefile.PL Sat Dec 13 02:11:48 2008
@@ -7,6 +7,7 @@
VERSION_FROM => 'lib/Spreadsheet/XLSX.pm', # finds $VERSION
'Archive::Zip' => 1.18,
+ 'Spreadsheet::ParseExcel' => '',
}, # e.g., Module::Name => 1.1
($] >= 5.005 ? ## Add these new keywords supported since 5.005
(ABSTRACT_FROM => 'lib/Spreadsheet/XLSX.pm', # retrieve abstract from module
Modified: branches/upstream/libspreadsheet-xlsx-perl/current/lib/Spreadsheet/XLSX.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libspreadsheet-xlsx-perl/current/lib/Spreadsheet/XLSX.pm?rev=28164&op=diff
--- branches/upstream/libspreadsheet-xlsx-perl/current/lib/Spreadsheet/XLSX.pm (original)
+++ branches/upstream/libspreadsheet-xlsx-perl/current/lib/Spreadsheet/XLSX.pm Sat Dec 13 02:11:48 2008
@@ -1,15 +1,17 @@
package Spreadsheet::XLSX;
+use 5.008008;
use strict;
use warnings;
our @ISA = qw();
-our $VERSION = '0.04';
+our $VERSION = '0.05';
use Archive::Zip;
+use Spreadsheet::XLSX::Fmt2007;
use Data::Dumper;
+use Spreadsheet::ParseExcel;
@@ -27,8 +29,11 @@
if ($member_shared_strings) {
- foreach my $t ($member_shared_strings -> contents =~ /\>([^\<]*)\<\/t/gsm) {
+ my $mstr = $member_shared_strings->contents;
+ $mstr =~ s/<t\/>/<t><\/t>/gsm; # this handles an empty t tag in the xml <t/>
+ #foreach my $t ($member_shared_strings -> contents =~ /t\>([^\<]*)\<\/t/gsm) {
+ foreach my $t ($mstr =~ /<t.*?>(.*?)<\/t/gsm) {
$t = $converter -> convert ($t) if $converter;
push @shared_strings, $t;
@@ -36,9 +41,47 @@
+ my $member_styles = $self -> {zip} -> memberNamed ('xl/styles.xml');
+ my @styles = ();
+ my %style_info = ();
+ if ($member_styles) {
+ foreach my $t ($member_styles -> contents =~ /xf\ numFmtId="([^"]*)"(?!.*\/cellStyleXfs)/gsm) { #"
+ # $t = $converter -> convert ($t) if $converter;
+ push @styles, $t;
+ }
+ my $default = $1;
+ foreach my $t1 (@styles){
+ $member_styles -> contents =~ /numFmtId="$t1" formatCode="([^"]*)/;
+ my $formatCode=$1;
+ if ($formatCode eq $default || not($formatCode)){
+ if ($t1 == 9 || $t1==10){ $formatCode="0.00000%";}
+ elsif ($t1 == 14){ $formatCode="m-d-yy";}
+ else {
+ $formatCode="";
+ }
+ }
+ $style_info{$t1} = $formatCode;
+ $default=$1;
+ }
+ }
my $member_workbook = $self -> {zip} -> memberNamed ('xl/workbook.xml') or die ("xl/workbook.xml not found in this zip\n");
+ my $oBook = Spreadsheet::ParseExcel::Workbook->new;
+ $oBook->{SheetCount} = 0;
+ $oBook->{FmtClass} = Spreadsheet::XLSX::Fmt2007->new;
+ $oBook->{Flg1904}=0;
+ if ($member_workbook->contents =~ /date1904="1"/){
+ $oBook->{Flg1904}=1;
+ }
my @Worksheet = ();
foreach ($member_workbook -> contents =~ /\<(.*?)\/?\>/g) {
@@ -60,7 +103,7 @@
foreach ($other =~ /(\S+=".*?")/gsm) {
- my ($k, $v) = split /=?"/;
+ my ($k, $v) = split /=?"/; #"
if ($k eq 'name') {
$sheet -> {Name} = $v;
@@ -71,8 +114,10 @@
- push @Worksheet, $sheet;
+ my $wsheet = Spreadsheet::ParseExcel::Worksheet->new(%$sheet);
+ push @Worksheet, $wsheet;
+ $oBook->{Worksheet}[$oBook->{SheetCount}] = $wsheet;
+ $oBook->{SheetCount}+=1;
@@ -88,23 +133,26 @@
my $flag = 0;
my $s = 0;
+ my $s2 = 0;
+ my $sty = 0;
foreach ($member_sheet -> contents =~ /(\<.*?\/?\>|.*?(?=\<))/g) {
if (/^\<c r=\"([A-Z])([A-Z]?)(\d+)\"/) {
$col = ord ($1) - 65;
if ($2) {
- $col++;
+ $col++;
$col *= 26;
$col += (ord ($2) - 65);
$row = $3 - 1;
- $s = /t=\"s\"/ ? 1 : 0;
+ $s = /t=\"s\"/ ? 1 : 0;
+ $s2 = /t=\"str\"/ ? 1 : 0;
+ /s="([^"]*)"/; #"
+ $sty = $1>0 ? $1 : 0 ;
elsif (/^<v/) {
$flag = 1;
@@ -113,21 +161,38 @@
$flag = 0;
elsif (length ($_) && $flag) {
my $v = $s ? $shared_strings [$_] : $_;
+ if ($v eq "</c>"){$v="";}
+ my $type = "Text";
+ my $thisstyle = "";
+ if (not($s) && not($s2)){
+ $type="Numeric";
+ $thisstyle = $style_info{$styles[$sty]};
+ if ($thisstyle =~ /(?<!Re)d|m|y/){
+ $type="Date";
+ }
+ }
$sheet -> {MaxRow} = $row if $sheet -> {MaxRow} < $row;
$sheet -> {MaxCol} = $col if $sheet -> {MaxCol} < $col;
$sheet -> {MinRow} = $row if $sheet -> {MinRow} > $row;
$sheet -> {MinCol} = $col if $sheet -> {MinCol} > $col;
- $sheet -> {Cells} [$row] [$col] = {
+ if ($v =~ /(.*)E\-(.*)/gsm && $type eq "Numeric"){
+ $v=$1/(10**$2); # this handles scientific notation for very small numbers
+ }
+ my $cell =Spreadsheet::ParseExcel::Cell->new(
Val => $v,
- _Value => $v,
+ Format => $thisstyle,
+ Type => $type
- };
+ );
+ $cell->{_Value} = $oBook->{FmtClass}->ValFmt($cell, $oBook);
+ if ($type eq "Date" && $v<1){ #then this is Excel time field
+ $cell->{Type}="Text";
+ $cell->{Val}=$cell->{_Value};
+ }
+ $sheet -> {Cells} [$row] [$col] = $cell;
@@ -136,10 +201,11 @@
$sheet -> {MinCol} = 0 if $sheet -> {MinCol} > $sheet -> {MaxCol};
+foreach my $stys (keys %style_info){
bless ($self, $class);
- return $self;
+ return $oBook;
@@ -189,7 +255,9 @@
This module is a (quick and dirty) emulation of Spreadsheet::ParseExcel for
-Excel 2007 (.xlsx) file format.
+Excel 2007 (.xlsx) file format. It supports styles and many of Excel's quirks,
+but not all. It populates the classes from Spreadsheet::ParseExcel for interoperability;
+including Workbook, Worksheet, and Cell.
=head1 SEE ALSO
@@ -246,7 +314,8 @@
Steve Simms
Joerg Meltzer
- Loreyna Yeung
+ Loreyna Yeung
+ Rob Polocz
More information about the Pkg-perl-cvs-commits
mailing list