94 lines
3.8 KiB
Plaintext
94 lines
3.8 KiB
Plaintext
|
xmlformat is distributed under a BSD-style license. This license
|
||
|
applies to the entire xmlformat distribution, with the exception of
|
||
|
the REX parser (described below).
|
||
|
|
||
|
Copyright (c) 2004, 2005, Kitebird, LLC. All rights reserved.
|
||
|
|
||
|
Redistribution and use in source and binary forms, with or without
|
||
|
modification, are permitted provided that the following conditions
|
||
|
are met:
|
||
|
|
||
|
1. Redistributions of source code must retain the above copyright
|
||
|
notice, this list of conditions and the following disclaimer.
|
||
|
|
||
|
2. Redistributions in binary form must reproduce the above copyright
|
||
|
notice, this list of conditions and the following disclaimer in the
|
||
|
documentation and/or other materials provided with the distribution.
|
||
|
|
||
|
3. Neither the name of Kitebird nor the names of its contributors may
|
||
|
be used to endorse or promote products derived from this software
|
||
|
without specific prior written permission.
|
||
|
|
||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||
|
POSSIBILITY OF SUCH DAMAGE.
|
||
|
|
||
|
----------------------------------------------------------------------
|
||
|
The REX parser
|
||
|
|
||
|
xmlformat contains code based on the REX parser, which is Copyright (c) 1998,
|
||
|
Robert D. Cameron. REX is described in this document:
|
||
|
|
||
|
http://www.cs.sfu.ca/~cameron/REX.html
|
||
|
|
||
|
The document contains a Perl implementation of REX:
|
||
|
|
||
|
--- begin REX code ---
|
||
|
# REX/Perl 1.0
|
||
|
# Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions",
|
||
|
# Technical Report TR 1998-17, School of Computing Science, Simon Fraser
|
||
|
# University, November, 1998.
|
||
|
# Copyright (c) 1998, Robert D. Cameron.
|
||
|
# The following code may be freely used and distributed provided that
|
||
|
# this copyright and citation notice remains intact and that modifications
|
||
|
# or additions are clearly identified.
|
||
|
|
||
|
$TextSE = "[^<]+";
|
||
|
$UntilHyphen = "[^-]*-";
|
||
|
$Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-";
|
||
|
$CommentCE = "$Until2Hyphens>?";
|
||
|
$UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+";
|
||
|
$CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>";
|
||
|
$S = "[ \\n\\t\\r]+";
|
||
|
$NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]";
|
||
|
$NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]";
|
||
|
$Name = "(?:$NameStrt)(?:$NameChar)*";
|
||
|
$QuoteSE = "\"[^\"]*\"|'[^']*'";
|
||
|
$DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*";
|
||
|
$MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>";
|
||
|
$S1 = "[\\n\\r\\t ]";
|
||
|
$UntilQMs = "[^?]*\\?+";
|
||
|
$PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>";
|
||
|
$DT_ItemSE =
|
||
|
"<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S";
|
||
|
$DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?>?";
|
||
|
$DeclCE =
|
||
|
"--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?";
|
||
|
$PI_CE = "$Name(?:$PI_Tail)?";
|
||
|
$EndTagCE = "$Name(?:$S)?>?";
|
||
|
$AttValSE = "\"[^<\"]*\"|'[^<']*'";
|
||
|
$ElemTagCE = "$Name(?:$S$Name(?:$S)?=(?:$S)?(?:$AttValSE))*(?:$S)?/?>?";
|
||
|
$MarkupSPE =
|
||
|
"<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)";
|
||
|
$XML_SPE = "$TextSE|$MarkupSPE";
|
||
|
|
||
|
|
||
|
sub ShallowParse {
|
||
|
my($XML_document) = @_;
|
||
|
return $XML_document =~ /$XML_SPE/g;
|
||
|
}
|
||
|
--- end REX code ---
|
||
|
|
||
|
The Perl and Ruby implementations of xmlformat contain parsers that
|
||
|
are based on the preceding code and are essentially the same, with the
|
||
|
exception of changes to variable and function names.
|
||
|
|