xmlreaderimpl/XMLv2__VWXMLReader.st
changeset 296 ea3dbc023c80
parent 173 738b801e1a58
child 300 b6d834208d33
equal deleted inserted replaced
295:c8f93ca6258c 296:ea3dbc023c80
       
     1 "{ Package: 'stx:goodies/xmlsuite/xmlreaderimpl' }"
       
     2 
       
     3 "{ NameSpace: XMLv2 }"
       
     4 
       
     5 XMLReader subclass:#VWXMLReader
       
     6 	instanceVariableNames:'parser'
       
     7 	classVariableNames:''
       
     8 	poolDictionaries:''
       
     9 	category:'XML Suite-SAX2-XMLReaders'
       
    10 !
       
    11 
       
    12 XML::StreamWrapper subclass:#StreamWrapper
       
    13 	instanceVariableNames:'col'
       
    14 	classVariableNames:''
       
    15 	poolDictionaries:''
       
    16 	privateIn:VWXMLReader
       
    17 !
       
    18 
       
    19 XML::XMLParserStX subclass:#XMLParser
       
    20 	instanceVariableNames:'xmlReader'
       
    21 	classVariableNames:''
       
    22 	poolDictionaries:''
       
    23 	privateIn:VWXMLReader
       
    24 !
       
    25 
       
    26 
       
    27 !VWXMLReader methodsFor:'SAX2 interface - Locator'!
       
    28 
       
    29 getColumnNumber
       
    30     "Superclass says that I am responsible to implement this method"
       
    31 
       
    32     ^parser getColumnNumber
       
    33 
       
    34     "Created: / 19-05-2005 / 10:45:03 / masca"
       
    35     "Modified: / 11-07-2005 / 12:07:19 / masca"
       
    36     "Modified: / 04-08-2005 / 11:21:01 / janfrog"
       
    37 !
       
    38 
       
    39 getLineNumber
       
    40     "Superclass says that I am responsible to implement this method"
       
    41 
       
    42     ^parser  getLineNumber
       
    43 
       
    44     "Created: / 19-05-2005 / 10:45:03 / masca"
       
    45     "Modified: / 11-07-2005 / 12:07:10 / masca"
       
    46     "Modified: / 04-08-2005 / 11:21:15 / janfrog"
       
    47 ! !
       
    48 
       
    49 !VWXMLReader methodsFor:'SAX2 interface - extensions'!
       
    50 
       
    51 parseFragmentStream:aStream 
       
    52     "Superclass says that I am responsible to implement this method"
       
    53 
       
    54     ^self parseStream:aStream
       
    55 
       
    56     "Created: / 03-05-2005 / 11:46:46 / janfrog"
       
    57 !
       
    58 
       
    59 parseStream:aStream
       
    60 
       
    61     | saxBuilder |
       
    62 
       
    63     aStream isExternalStream ifTrue:[
       
    64         aStream text
       
    65     ] ifFalse:[
       
    66         aStream isBinary ifTrue:[aStream on:aStream collection asString]
       
    67     ].
       
    68 
       
    69 
       
    70     saxBuilder := VWSAXBuilder 
       
    71                         driver: self getContentHandler 
       
    72                         source: aStream.
       
    73     parser := XMLv2::VWXMLReader::XMLParser on: aStream.
       
    74     parser xmlReader:self.
       
    75     parser builder:saxBuilder.
       
    76     parser validate:false.
       
    77     parser scanDocument.
       
    78 
       
    79     "Created: / 03-05-2005 / 11:46:46 / janfrog"
       
    80     "Modified: / 18-05-2005 / 11:04:37 / masca"
       
    81     "Modified: / 06-06-2006 / 09:23:21 / janfrog"
       
    82 ! !
       
    83 
       
    84 !VWXMLReader::StreamWrapper class methodsFor:'documentation'!
       
    85 
       
    86 version
       
    87     ^'$Id$'
       
    88 ! !
       
    89 
       
    90 !VWXMLReader::StreamWrapper methodsFor:'accessing'!
       
    91 
       
    92 col
       
    93     ^ col
       
    94 
       
    95     "Created: / 04-08-2005 / 11:19:20 / janfrog"
       
    96 !
       
    97 
       
    98 line
       
    99     ^ line
       
   100 
       
   101     "Created: / 04-08-2005 / 11:19:20 / janfrog"
       
   102 ! !
       
   103 
       
   104 !VWXMLReader::StreamWrapper methodsFor:'declaration'!
       
   105 
       
   106 encodingDeclIn: aParser
       
   107 
       
   108         | enc |
       
   109         ^stream peek = $e
       
   110                 ifTrue:
       
   111                         [| encoding |
       
   112                         self mustFind: 'encoding' errorOn: aParser.
       
   113                         self skipSpaceIn: aParser.
       
   114                         self mustFind: '=' errorOn: aParser.
       
   115                         self skipSpaceIn: aParser.
       
   116                         encoding := self quotedString.
       
   117                         aParser validateEncoding: encoding.
       
   118                         ((stream respondsTo: #encoder)
       
   119                                         and: [stream encoder nameOfEncoding asLowercase ~= (XMLv2::VWXMLReader::XMLParser mapEncoding: encoding) asLowercase])
       
   120                                 ifTrue:
       
   121                                         [enc := (CharacterEncoder encoderFor:
       
   122                                                         (XMLv2::VWXMLReader::XMLParser mapEncoding: encoding) asSymbol).
       
   123 
       
   124                                         stream encoder: enc].
       
   125                         true]
       
   126                 ifFalse: [false]
       
   127 
       
   128     "Created: / 15-06-2005 / 15:00:01 / janfrog"
       
   129     "Modified: / 22-06-2005 / 17:02:27 / masca"
       
   130 ! !
       
   131 
       
   132 !VWXMLReader::StreamWrapper methodsFor:'initialization'!
       
   133 
       
   134 stream: str protocol: key name: value entity: ent
       
   135 
       
   136     super stream: str protocol: key name: value entity: ent.
       
   137 
       
   138     line := 1.
       
   139     col := 0.
       
   140     stream := EncodedStream 
       
   141                 stream:stream 
       
   142                 encoder:(CharacterEncoder encoderForUTF8)
       
   143 
       
   144     "Created: / 15-06-2005 / 14:58:51 / janfrog"
       
   145     "Modified: / 04-08-2005 / 11:42:05 / janfrog"
       
   146 ! !
       
   147 
       
   148 !VWXMLReader::StreamWrapper methodsFor:'streaming'!
       
   149 
       
   150 nextFor:aParser 
       
   151     |ch ascii|
       
   152 
       
   153     ch := stream next.
       
   154     (ch = Character lf) ifTrue:[
       
   155         line := line + 1.
       
   156         col := 0.
       
   157     ] ifFalse:[
       
   158 
       
   159         col := col + 1.
       
   160     ].
       
   161     ch isNil ifTrue:[
       
   162         ^ nil
       
   163     ].
       
   164     ascii := ch codePoint.
       
   165 
       
   166     "to speed up things, do a quick check for
       
   167      common characters"
       
   168 
       
   169     ascii >= 16r20 ifTrue:[
       
   170         ascii <= 16rFF ifTrue:[
       
   171             ^ ch
       
   172         ].
       
   173     ] ifFalse:[
       
   174         isInternal ifFalse:[
       
   175             ch == cr ifTrue:[
       
   176                 stream peekFor:lf.
       
   177                 ^ aParser eol
       
   178             ] ifFalse:[
       
   179                 ch == lf ifTrue:[
       
   180                     ^ aParser eol
       
   181                 ]
       
   182             ]
       
   183         ].
       
   184     ].
       
   185     (aParser isIllegalCharacter:ascii) ifTrue:[
       
   186         aParser 
       
   187             notPermitted:'a character with Unicode value ' , ascii printString
       
   188     ].
       
   189     ^ ch
       
   190 
       
   191     "Created: / 04-08-2005 / 11:16:48 / janfrog"
       
   192 !
       
   193 
       
   194 skip: n
       
   195 
       
   196     | str idx lfs |
       
   197     n < 0 ifTrue:[^stream skip:n].
       
   198     str := stream next:n.
       
   199     lfs := str occurrencesOf:Character lf.
       
   200     lfs isZero 
       
   201         ifTrue:[
       
   202             col := col + str size
       
   203         ] ifFalse:[
       
   204             idx := str lastIndexOf:Character lf.
       
   205             line := line + lfs . 
       
   206             col := col + (str size - idx).
       
   207     ].
       
   208 
       
   209     "Created: / 04-08-2005 / 11:18:37 / janfrog"
       
   210     "Modified: / 04-08-2005 / 14:54:25 / janfrog"
       
   211 ! !
       
   212 
       
   213 !VWXMLReader::XMLParser class methodsFor:'documentation'!
       
   214 
       
   215 version
       
   216     ^'$Id$'
       
   217 ! !
       
   218 
       
   219 !VWXMLReader::XMLParser class methodsFor:'utilities'!
       
   220 
       
   221 mapEncoding: anEncoding
       
   222 
       
   223         | enc |
       
   224         enc := anEncoding asLowercase.
       
   225         "enc = 'utf-8' ifTrue: [^'UTF_8'].
       
   226         enc = 'utf-16' ifTrue: [^'UTF_16'].
       
   227         enc = 'iso-8859-1' ifTrue: [^'ISO8859_1']."
       
   228         ^enc
       
   229 
       
   230     "Created: / 22-06-2005 / 17:11:03 / masca"
       
   231 !
       
   232 
       
   233 readFileContents: fn
       
   234 
       
   235         | s p |
       
   236         s := VWXMLReader::StreamWrapper
       
   237                         stream: (fn withEncoding: #UTF_8) readStream
       
   238                         protocol: 'file'
       
   239                         name: nil       
       
   240                         entity: nil.
       
   241         p := self new.
       
   242         p lineEndLF.
       
   243         ^[s checkEncoding.
       
   244           s contentsFor: p]
       
   245                 ensure: [s close]
       
   246 
       
   247     "Created: / 15-06-2005 / 15:04:40 / janfrog"
       
   248 ! !
       
   249 
       
   250 !VWXMLReader::XMLParser methodsFor:'SAX2 interface - Locator'!
       
   251 
       
   252 getColumnNumber
       
   253     "Superclass says that I am responsible to implement this method"
       
   254 
       
   255     ^currentSource col
       
   256 
       
   257     "Modified: / 11-07-2005 / 12:07:19 / masca"
       
   258     "Created: / 04-08-2005 / 11:20:26 / janfrog"
       
   259 !
       
   260 
       
   261 getLineNumber
       
   262     "Superclass says that I am responsible to implement this method"
       
   263 
       
   264     ^currentSource line
       
   265 
       
   266     "Modified: / 11-07-2005 / 12:07:10 / masca"
       
   267     "Created: / 04-08-2005 / 11:20:26 / janfrog"
       
   268 ! !
       
   269 
       
   270 !VWXMLReader::XMLParser methodsFor:'accessing'!
       
   271 
       
   272 dtdFile:aString
       
   273 
       
   274     "Created: / 18-05-2005 / 11:04:10 / masca"
       
   275 !
       
   276 
       
   277 xmlReader
       
   278     ^ xmlReader
       
   279 
       
   280     "Created: / 25-10-2005 / 13:09:54 / janfrog"
       
   281 !
       
   282 
       
   283 xmlReader:something
       
   284     xmlReader := something.
       
   285 
       
   286     "Created: / 25-10-2005 / 13:09:54 / janfrog"
       
   287 ! !
       
   288 
       
   289 !VWXMLReader::XMLParser methodsFor:'attribute processing'!
       
   290 
       
   291 processAttributes
       
   292     |attributes attr keys|
       
   293 
       
   294     attributes := nil.
       
   295     [
       
   296         self skipSpace.
       
   297         self isValidNameStart:hereChar
       
   298     ] whileTrue:[
       
   299         attributes == nil ifTrue:[
       
   300             attributes := OrderedCollection new.
       
   301             keys := Set new.
       
   302         ].
       
   303         attr := self attribute.
       
   304         attributes add:attr.
       
   305         "(keys testAndAdd:attr key) ifTrue:[
       
   306             self notPermitted:'two attributes with the same name'
       
   307         ]."
       
   308          "
       
   309          (attributes collect: [:i | i key]) asSet size = attributes size
       
   310          ifFalse: [self notPermitted: 'two attributes with the same name']
       
   311 
       
   312         "
       
   313     ].
       
   314     ^ attributes
       
   315 
       
   316     "Created: / 02-06-2008 / 19:28:48 / Jan Vrany <vranyj1@fel.cvut.cz>"
       
   317 ! !
       
   318 
       
   319 !VWXMLReader::XMLParser methodsFor:'element processing'!
       
   320 
       
   321 elementContent: tag openedIn: str
       
   322 
       
   323         | data elements str1 result p |
       
   324         data := (UnicodeString new: 32) writeStream.
       
   325         elements := OrderedCollection new.
       
   326         [hereChar == nil
       
   327                 ifTrue: [self expected: ('end tag for %<<1s>>' expandMacrosWith: tag)].
       
   328         hereChar = $<
       
   329                 ifTrue:
       
   330                         [data position0Based = 0
       
   331                                 ifFalse:
       
   332                                         [data := data contents.
       
   333 "                                       (data findString: ']]>' startingAt: 1) = 0
       
   334                                                 ifFalse: [self halt: 'including ]]> in element content'].
       
   335 "                                       self with: elements add: (builder makeText: data).
       
   336                                         data := (UnicodeString new: 32) writeStream].
       
   337                         str1 := currentSource.
       
   338                         (self skipIf: '</')
       
   339                                 ifTrue:
       
   340                                         [result := self closeTag: tag return: elements asArray.
       
   341                                         str == lastSource
       
   342                                                 ifFalse: [self expected: 'elements properly nested within entities'].
       
   343                                         self isValidating
       
   344                                                 ifTrue:
       
   345                                                         [p := self dtd elementFor: tag.
       
   346                                                         p == nil
       
   347                                                                 ifTrue: [self invalid: ('Using a tag (<1s>) without declaring it is not valid' expandMacrosWith: tag)]
       
   348                                                                 ifFalse: [p validateTag: tag content: result for: self]].
       
   349                                         ^result]
       
   350                                 ifFalse: [(self skipIf: '<?')
       
   351                                         ifTrue: [self with: elements add: (self completePI: str1)]
       
   352                                         ifFalse: [(self skipIf: '<!![CDATA[')
       
   353                                                 ifTrue: [self with: elements add: (self completeCDATA: str1)]
       
   354                                                 ifFalse: [(self skipIf: '<!!--')
       
   355                                                         ifTrue: [self with: elements add: (self completeComment: str1)]
       
   356                                                         ifFalse: [self with: elements add: self element]]]]]
       
   357                 ifFalse: [hereChar = $&
       
   358                         ifTrue:
       
   359                                 [str1 := currentSource.
       
   360                                 (self skipIf: '&#')
       
   361                                         ifTrue: [self charEntity: data startedIn: str1]
       
   362                                         ifFalse: [self getNextChar; generalEntityInText: data canBeExternal: true]]
       
   363                         ifFalse:
       
   364                                 [data nextPut: hereChar.
       
   365                                 self getNextChar]]. true] whileTrue "repeat"
       
   366 
       
   367     "Created: / 22-06-2005 / 09:23:51 / masca"
       
   368     "Modified: / 15-05-2006 / 14:50:28 / janfrog"
       
   369 !
       
   370 
       
   371 generalEntityInText: str canBeExternal: external
       
   372 
       
   373         | exp nm str1 |
       
   374         str1 := lastSource.
       
   375         nm := self getSimpleName.
       
   376         hereChar = $;
       
   377                 ifFalse: [self expected: 'semicolon'].
       
   378         currentSource = str1
       
   379                 ifFalse: [self expected: 'proper nesting of entity references within other entity references'].
       
   380 
       
   381         ^self.
       
   382 
       
   383         exp := self dtd generalEntityAt: nm.
       
   384         exp == nil
       
   385                 ifTrue: [
       
   386                          "/self warn: ('The general entity "<1s>" has not been defined'
       
   387                          "/                               expandMacrosWith: nm)
       
   388                         ].
       
   389         exp == nil
       
   390                 ifTrue:
       
   391                         [self shouldTestWFCEntityDeclared
       
   392                                 ifTrue: [self malformed: 'General entity used but not defined'].
       
   393                         str nextPut: $&; nextPutAll: nm; nextPut: $;.
       
   394                         self getNextChar]
       
   395                 ifFalse:
       
   396                         [(external or: [exp isExternal not])
       
   397                                 ifFalse: [self notPermitted: 'external entities in attribute values'].
       
   398                         exp isParsed
       
   399                                 ifFalse: [self malformed: 'References to unparsed entities other than in an attribute of type ENTITY are not permitted'].
       
   400                         "exp streamFor: self"].
       
   401 
       
   402     "Created: / 07-11-2006 / 21:09:05 / janfrog"
       
   403 ! !
       
   404 
       
   405 !VWXMLReader::XMLParser methodsFor:'initialization'!
       
   406 
       
   407 wrapStream:aStream protocol:protocolString name:name
       
   408 
       
   409     |protocol|
       
   410 
       
   411     protocolString notNil ifTrue:[
       
   412         protocol := protocolString.
       
   413     ] ifFalse:[
       
   414         protocol := aStream isExternalStream ifTrue:['file'] ifFalse:['internal']
       
   415     ].
       
   416 
       
   417     ^VWXMLReader::StreamWrapper
       
   418             stream: aStream
       
   419             protocol: protocol
       
   420             name: name
       
   421             entity: nil
       
   422 
       
   423     "Created: / 15-06-2005 / 15:05:38 / janfrog"
       
   424 ! !
       
   425 
       
   426 !VWXMLReader::XMLParser methodsFor:'private'!
       
   427 
       
   428 malformed: aString
       
   429 
       
   430     self xmlReader getErrorHandler fatalError: aString
       
   431 
       
   432     "Created: / 25-10-2005 / 13:09:07 / janfrog"
       
   433 !
       
   434 
       
   435 warn: aString
       
   436 
       
   437     self malformed: 'XML parser warning: ' , aString
       
   438 
       
   439     "Created: / 22-06-2005 / 10:12:49 / masca"
       
   440 ! !
       
   441 
       
   442 !VWXMLReader class methodsFor:'documentation'!
       
   443 
       
   444 version
       
   445     ^ '$Header: /opt/data/cvs/stx/goodies/xmlsuite/xmlreaderimpl/XMLv2__VWXMLReader.st,v 1.4 2006-11-08 09:29:50 vranyj1 Exp $'
       
   446 !
       
   447 
       
   448 version_SVN
       
   449     ^ '$Id$'
       
   450 ! !