summaryrefslogtreecommitdiffstats
path: root/kate/xmltools/pseudo_dtd.cpp
blob: b724f7d149be3252ad5ec759e730fabe4895c1c6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
/***************************************************************************
	pseudoDtd.cpp
	copyright			: (C) 2001-2002 by Daniel Naber
	email				: daniel.naber@t-online.de
 ***************************************************************************/

/***************************************************************************
 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or ( at your option ) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 ***************************************************************************/

#include "pseudo_dtd.h"

#include <assert.h>

#include <tqdom.h>
#include <tqregexp.h>

#include <tdelocale.h>
#include <tdemessagebox.h>

PseudoDTD::PseudoDTD()
{
  // "SGML support" only means case-insensivity, because HTML is case-insensitive up to version 4:
  m_sgmlSupport = true;	  // TODO: make this an run-time option ( maybe automatically set )
}

PseudoDTD::~PseudoDTD()
{
}

void PseudoDTD::analyzeDTD( TQString &metaDtdUrl, TQString &metaDtd )
{
  TQDomDocument doc( "dtdIn_xml" );
  if ( ! doc.setContent( metaDtd) )
  {
    KMessageBox::error(0, i18n("The file '%1' could not be parsed. "
        "Please check that the file is well-formed XML.").arg( metaDtdUrl ),
        i18n( "XML Plugin Error") );
    return;
  }

  if ( doc.doctype().name() != "dtd" )
  {
    KMessageBox::error(0, i18n("The file '%1' is not in the expected format. "
        "Please check that the file is of this type:\n"
            "-//Norman Walsh//DTD DTDParse V2.0//EN\n"
            "You can produce such files with dtdparse. "
            "See the Kate Plugin documentation for more information.").arg( metaDtdUrl ),
        i18n("XML Plugin Error") );
    return;
  }

  uint listLength = 0;
  listLength += doc.elementsByTagName( "entity" ).count();
  listLength += doc.elementsByTagName( "element" ).count();
	// count this twice, as it will be iterated twice ( TODO: optimize that? ):
  listLength += doc.elementsByTagName( "attlist" ).count() * 2;

  TQProgressDialog progress( i18n("Analyzing meta DTD..."), i18n("Cancel"), listLength,
                            0, "progress", TRUE );
  progress.setMinimumDuration( 400 );
  progress.setProgress(0);

  // Get information from meta DTD and put it in TQt data structures for fast access:
  if( ! parseEntities( &doc, &progress ) )
    return;

  if( ! parseElements( &doc, &progress ) )
    return;

  if( ! parseAttributes( &doc, &progress ) )
    return;

  if( ! parseAttributeValues( &doc, &progress ) )
    return;

  progress.setProgress( listLength );	// just to make sure the dialog disappears

}

// ========================================================================
// DOM stuff:

/**
 * Iterate through the XML to get a mapping which sub-elements are allowed for
 * all elements.
 */
bool PseudoDTD::parseElements( TQDomDocument *doc, TQProgressDialog *progress )
{

  m_elementsList.clear();
	// We only display a list, i.e. we pretend that the content model is just
	// a set, so we use a map. This is necessay e.g. for xhtml 1.0's head element,
	// which would otherwise display some elements twice.
  TQMap<TQString,bool> subelementList;	// the bool is not used

  TQDomNodeList list = doc->elementsByTagName( "element" );
  uint listLength = list.count();      // speedup (really! )

  for( uint i = 0; i < listLength; i++ )
  {
    if( progress->wasCancelled() )
      return false;

      progress->setProgress( progress->progress()+1 );
    // FIXME!:
    //tqApp->processEvents();

    subelementList.clear();
    TQDomNode node = list.item( i );
    TQDomElement elem = node.toElement();

    if( !elem.isNull() )
    {
      // Enter the expanded content model, which may also include stuff not allowed.
      // We do not care if it's a <sequence-group> or whatever.
      TQDomNodeList contentModelList = elem.elementsByTagName( "content-model-expanded" );
      TQDomNode contentModelNode = contentModelList.item(0);
      TQDomElement contentModelElem = contentModelNode.toElement();
      if( ! contentModelElem.isNull() )
      {
        // check for <pcdata/>:
        TQDomNodeList pcdataList = contentModelElem.elementsByTagName( "pcdata" );

        // check for other sub elements:
        TQDomNodeList subList = contentModelElem.elementsByTagName( "element-name" );
        uint subListLength = subList.count();
        for( uint l = 0; l < subListLength; l++ )
        {
          TQDomNode subNode = subList.item(l);
          TQDomElement subElem = subNode.toElement();
          if( !subElem.isNull() )
            subelementList[subElem.attribute( "name" )] = true;
        }

        // anders: check if this is an EMPTY element, and put "__EMPTY" in the
        // sub list, so that we can insert tags in empty form if required.
        TQDomNodeList emptyList = elem.elementsByTagName( "empty" );
        if ( emptyList.count() )
          subelementList["__EMPTY"] = true;
      }

      // Now remove the elements not allowed (e.g. <a> is explicitely not allowed in <a>
      // in the HTML 4.01 Strict DTD):
      TQDomNodeList exclusionsList = elem.elementsByTagName( "exclusions" );
      if( exclusionsList.length() > 0 )
      {	// sometimes there are no exclusions ( e.g. in XML DTDs there are never exclusions )
        TQDomNode exclusionsNode = exclusionsList.item(0);
        TQDomElement exclusionsElem = exclusionsNode.toElement();
        if( ! exclusionsElem.isNull() )
        {
          TQDomNodeList subList = exclusionsElem.elementsByTagName( "element-name" );
          uint subListLength = subList.count();
          for( uint l = 0; l < subListLength; l++ )
          {
            TQDomNode subNode = subList.item(l);
            TQDomElement subElem = subNode.toElement();
            if( !subElem.isNull() )
            {
              TQMap<TQString,bool>::Iterator it = subelementList.find( subElem.attribute( "name" ) );
              if( it != subelementList.end() )
                subelementList.remove(it);
            }
          }
        }
      }

      // turn the map into a list:
      TQStringList subelementListTmp;
      TQMap<TQString,bool>::Iterator it;
      for( it = subelementList.begin(); it != subelementList.end(); ++it )
        subelementListTmp.append( it.key() );

        m_elementsList.insert( elem.attribute( "name" ), subelementListTmp );

    }

  } // end iteration over all <element> nodes
  return true;
}

/**
 * Check which elements are allowed inside a parent element. This returns
 * a list of allowed elements, but it doesn't care about order or if only a certain
 * number of occurences is allowed.
 */
TQStringList PseudoDTD::allowedElements( TQString parentElement )
{
  if( m_sgmlSupport )
  {
    // find the matching element, ignoring case:
    TQMap<TQString,TQStringList>::Iterator it;
    for( it = m_elementsList.begin(); it != m_elementsList.end(); ++it )
    {
      if( it.key().lower() == parentElement.lower() )
        return it.data();
    }
  }
  else if( m_elementsList.contains(parentElement) )
    return m_elementsList[parentElement];

  return TQStringList();
}

/**
 * Iterate through the XML to get a mapping which attributes are allowed inside
 * all elements.
 */
bool PseudoDTD::parseAttributes( TQDomDocument *doc, TQProgressDialog *progress )
{
  m_attributesList.clear();
//   TQStringList allowedAttributes;
  TQDomNodeList list = doc->elementsByTagName( "attlist" );
  uint listLength = list.count();

  for( uint i = 0; i < listLength; i++ )
  {
    if( progress->wasCancelled() )
      return false;

    progress->setProgress( progress->progress()+1 );
    // FIXME!!
    //tqApp->processEvents();

    ElementAttributes attrs;
    TQDomNode node = list.item(i);
    TQDomElement elem = node.toElement();
    if( !elem.isNull() )
    {
      TQDomNodeList attributeList = elem.elementsByTagName( "attribute" );
      uint attributeListLength = attributeList.count();
      for( uint l = 0; l < attributeListLength; l++ )
      {
        TQDomNode attributeNode = attributeList.item(l);
        TQDomElement attributeElem = attributeNode.toElement();

        if( ! attributeElem.isNull() )
        {
          if ( attributeElem.attribute("type") == "#REQUIRED" )
            attrs.requiredAttributes.append( attributeElem.attribute("name") );
          else
            attrs.optionalAttributes.append( attributeElem.attribute("name") );
        }
      }
      m_attributesList.insert( elem.attribute("name"), attrs );
    }
  }

  return true;
}

/** Check which attributes are allowed for an element.
 */
TQStringList PseudoDTD::allowedAttributes( TQString element )
{
  if( m_sgmlSupport )
  {
    // find the matching element, ignoring case:
    TQMap<TQString,ElementAttributes>::Iterator it;
    for( it = m_attributesList.begin(); it != m_attributesList.end(); ++it ) {
      if( it.key().lower() == element.lower() ) {
        return it.data().optionalAttributes + it.data().requiredAttributes;
      }
    }
  }
  else if( m_attributesList.contains(element) )
    return m_attributesList[element].optionalAttributes + m_attributesList[element].requiredAttributes;

  return TQStringList();
}

TQStringList PseudoDTD::requiredAttributes( const TQString &element ) const
{
  if ( m_sgmlSupport )
  {
    TQMap<TQString,ElementAttributes>::ConstIterator it;
    for( it = m_attributesList.begin(); it != m_attributesList.end(); ++it )
    {
      if( it.key().lower() == element.lower() )
        return it.data().requiredAttributes;
    }
  }
  else if( m_attributesList.contains(element) )
    return m_attributesList[element].requiredAttributes;

  return TQStringList();
}

/**
 * Iterate through the XML to get a mapping which attribute values are allowed
 * for all attributes inside all elements.
 */
bool PseudoDTD::parseAttributeValues( TQDomDocument *doc, TQProgressDialog *progress )
{
  m_attributevaluesList.clear();						// 1 element : n possible attributes
  TQMap<TQString,TQStringList> attributevaluesTmp;		// 1 attribute : n possible values
  TQDomNodeList list = doc->elementsByTagName( "attlist" );
  uint listLength = list.count();

  for( uint i = 0; i < listLength; i++ )
  {
    if( progress->wasCancelled() )
      return false;

    progress->setProgress( progress->progress()+1 );
    // FIXME!
    //tqApp->processEvents();

    attributevaluesTmp.clear();
    TQDomNode node = list.item(i);
    TQDomElement elem = node.toElement();
    if( !elem.isNull() )
    {
      // Enter the list of <attribute>:
      TQDomNodeList attributeList = elem.elementsByTagName( "attribute" );
      uint attributeListLength = attributeList.count();
      for( uint l = 0; l < attributeListLength; l++ )
      {
        TQDomNode attributeNode = attributeList.item(l);
        TQDomElement attributeElem = attributeNode.toElement();
        if( ! attributeElem.isNull() )
        {
          TQString value = attributeElem.attribute( "value" );
          attributevaluesTmp.insert( attributeElem.attribute("name"), TQStringList::split(TQRegExp(" "), value) );
        }
      }
      m_attributevaluesList.insert( elem.attribute("name"), attributevaluesTmp );
    }
  }
  return true;
}

/**
 * Check which attributes values are allowed for an attribute in an element
 * (the element is necessary because e.g. "href" inside <a> could be different
 * to an "href" inside <link>):
 */
TQStringList PseudoDTD::attributeValues( TQString element, TQString attribute )
{
  // Direct access would be faster than iteration of course but not always correct,
  // because we need to be case-insensitive.
  if( m_sgmlSupport ) {
    // first find the matching element, ignoring case:
    TQMap< TQString,TQMap<TQString,TQStringList> >::Iterator it;
    for( it = m_attributevaluesList.begin(); it != m_attributevaluesList.end(); ++it )
    {
      if( it.key().lower() == element.lower() )
      {
        TQMap<TQString,TQStringList> attrVals = it.data();
        TQMap<TQString,TQStringList>::Iterator itV;
        // then find the matching attribute for that element, ignoring case:
        for( itV = attrVals.begin(); itV != attrVals.end(); ++itV )
        {
          if( itV.key().lower() == attribute.lower() )
            return( itV.data() );
        }
      }
    }
  }
  else if( m_attributevaluesList.contains(element) )
  {
    TQMap<TQString,TQStringList> attrVals = m_attributevaluesList[element];
    if( attrVals.contains(attribute) )
      return attrVals[attribute];
  }

  // no predefined values available:
  return TQStringList();
}

/**
 * Iterate through the XML to get a mapping of all entity names and their expanded
 * version, e.g. nbsp => &#160;. Parameter entities are ignored.
 */
bool PseudoDTD::parseEntities( TQDomDocument *doc, TQProgressDialog *progress )
{
  m_entityList.clear();
  TQDomNodeList list = doc->elementsByTagName( "entity" );
  uint listLength = list.count();

  for( uint i = 0; i < listLength; i++ )
  {
    if( progress->wasCancelled() )
      return false;

    progress->setProgress( progress->progress()+1 );
    //FIXME!!
    //tqApp->processEvents();
    TQDomNode node = list.item(i);
    TQDomElement elem = node.toElement();
    if( !elem.isNull()
         && elem.attribute( "type" ) != "param" )
    { // TODO: what's cdata <-> gen ?
      TQDomNodeList expandedList = elem.elementsByTagName( "text-expanded" );
      TQDomNode expandedNode = expandedList.item(0);
      TQDomElement expandedElem = expandedNode.toElement();
      if( ! expandedElem.isNull() )
      {
        TQString exp = expandedElem.text();
        // TODO: support more than one &#...; in the expanded text
        /* TODO include do this when the unicode font problem is solved:
        if( exp.contains(TQRegExp("^&#x[a-zA-Z0-9]+;$")) ) {
        // hexadecimal numbers, e.g. "&#x236;"
        uint end = exp.find( ";" );
        exp = exp.mid( 3, end-3 );
        exp = TQChar();
      } else if( exp.contains(TQRegExp("^&#[0-9]+;$")) ) {
        // decimal numbers, e.g. "&#236;"
        uint end = exp.find( ";" );
        exp = exp.mid( 2, end-2 );
        exp = TQChar( exp.toInt() );
      }
    */
        m_entityList.insert( elem.attribute("name"), exp );
      }
      else
      {
        m_entityList.insert( elem.attribute("name"), TQString() );
      }
    }
  }
  return true;
}

/**
 * Get a list of all ( non-parameter ) entities that start with a certain string.
 */
TQStringList PseudoDTD::entities( TQString start )
{
  TQStringList entities;
  TQMap<TQString,TQString>::Iterator it;
  for( it = m_entityList.begin(); it != m_entityList.end(); ++it ) {
    if( (*it).startsWith(start) )
    {
      TQString str = it.key();
      /* TODO: show entities as unicode character
      if( !it.data().isEmpty() ) {
      //str += " -- " + it.data();
      TQRegExp re( "&#(\\d+);" );
      if( re.search(it.data()) != -1 ) {
      uint ch = re.cap( 1).toUInt();
      str += " -- " + TQChar( ch).decomposition();
    }
    //kdDebug() << "#" << it.data() << endl;
    }
   */
      entities.append( str );
    // TODO: later use a table view
    }
  }
  return entities;
}