Logo Search packages:      
Sourcecode: kdeaddons version File versions

pseudo_dtd.cpp

/***************************************************************************
      pseudo_dtd.cpp
      copyright               : (C) 2001-2002 by Daniel Naber
      email                   : daniel.naber@t-online.de
 ***************************************************************************/

/***************************************************************************
 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 ***************************************************************************/

#include "pseudo_dtd.h"

#include <assert.h>

#include <qdom.h>
#include <qregexp.h>

#include <klocale.h>
#include <kmessagebox.h>

PseudoDTD::PseudoDTD()
{
      //kdDebug() << "PseudoDTD::PseudoDTD()" << endl;
      // "SGML support" only means case-insensivity, because HTML is case-insensitive up to version 4:
      m_sgml_support = true;    // TODO: make this an run-time option (maybe automatically set)
}

PseudoDTD::~PseudoDTD()
{
      //kdDebug() << "PseudoDTD::~PseudoDTD()" << endl;
}

void PseudoDTD::analyzeDTD(QString &meta_dtd_url, QString &meta_dtd)
{
      QDomDocument doc("dtd_in_xml");
      if ( !doc.setContent(meta_dtd) ) {
            KMessageBox::error(0, i18n("The file '%1' could not be parsed. "
                  "Please check that the file is well-formed XML.").arg(meta_dtd_url),
                  i18n("XML Plugin Error"));
            return;
      }
      if ( doc.doctype().name() != "dtd" ) {
            KMessageBox::error(0, i18n("The file '%1' is not in the expected format. "
                  "Please check that the file is of this type:\n"
                  "-//Norman Walsh//DTD DTDParse V2.0//EN\n"
                  "You can produce such files with dtdparse. "
                  "See the Kate Plugin documentation for more information.").arg(meta_dtd_url),
                  i18n("XML Plugin Error"));
            return;
      }

      uint list_length = 0;
      list_length += doc.elementsByTagName("entity").count();
      list_length += doc.elementsByTagName("element").count();
      // count this twice, as it will be iterated twice (TODO: optimize that?):
      list_length += doc.elementsByTagName("attlist").count() * 2;

      QProgressDialog progress( i18n("Analyzing meta DTD..."), i18n("Cancel"), list_length,
                                                0, "progress", TRUE );
      progress.setMinimumDuration(400);
      progress.setProgress(0);
      
      // Get information from meta DTD and put it in Qt data structures for fast access:
      if( ! getEntities(&doc, &progress) ) {
            return;
      }
      if( ! getAllowedElements(&doc, &progress) ) {
            return;
      }
      if( ! getAllowedAttributes(&doc, &progress) ) {
            return;
      }
      if( ! getAllowedAttributeValues(&doc, &progress) ) {
            return;
      }

      progress.setProgress(list_length);  // just to make sure the dialog disappears

}

// ========================================================================
// DOM stuff:

/** Iterate through the XML to get a mapping which sub-elements are allowed for
  * all elements.
  */
bool PseudoDTD::getAllowedElements(QDomDocument *doc, QProgressDialog *progress)
{

      m_elements_list.clear();
      // We only display a list, i.e. we pretend that the content model is just
      // a set, so we use a map. This is necessay e.g. for xhtml 1.0's head element, 
      // which would otherwise display some elements twice.
      QMap<QString,bool> subelement_list; // the bool is not used
      
      QDomNodeList list = doc->elementsByTagName("element");
      uint list_length = list.count();    // speedup (really!)

      for( uint i = 0; i < list_length; i++ ) {
            if( progress->wasCancelled() ) {
                  return false;
            }
            progress->setProgress(progress->progress()+1);
            // FIXME!:
            //qApp->processEvents();

            subelement_list.clear();
            QDomNode node = list.item(i);
            QDomElement elem = node.toElement();
            
            if( !elem.isNull() ) {

                  // Enter the expanded content model, which may also include stuff not allowed.
                  // We do not care if it's a <sequence-group> or whatever.
                  QDomNodeList content_model_list = elem.elementsByTagName("content-model-expanded");
                  QDomNode content_model_node = content_model_list.item(0);
                  QDomElement content_model_elem = content_model_node.toElement();
                  if( ! content_model_elem.isNull() ) {
                        // check for <pcdata/>:
                        QDomNodeList pcdata_list = content_model_elem.elementsByTagName("pcdata");
                        // check for other sub elements:
                        QDomNodeList sub_list = content_model_elem.elementsByTagName("element-name");
                        uint sub_list_length = sub_list.count();
                        for( uint l = 0; l < sub_list_length; l++ ) {
                              QDomNode sub_node = sub_list.item(l);
                              QDomElement sub_elem = sub_node.toElement();
                              if( !sub_elem.isNull() ) {
                                    subelement_list[sub_elem.attribute("name")] = true;
                              }
                        }
                                // anders: check if this is an EMPTY element, and put "__EMPTY" in the
                                // sub list, so that we can insert tags in empty form if required.
                                QDomNodeList empty_list = elem.elementsByTagName("empty");
                                if ( empty_list.count() ) {
                                        subelement_list["__EMPTY"] = true;
                                }
                  }
                  // Now remove the elements not allowed (e.g. <a> is explicitely not allowed in <a> 
                  // in the HTML 4.01 Strict DTD):
                  QDomNodeList exclusions_list = elem.elementsByTagName("exclusions");
                  if( exclusions_list.length() > 0 ) {      // sometimes there are no exclusions (e.g. in XML DTDs there are never exclusions)
                        QDomNode exclusions_node = exclusions_list.item(0);
                        QDomElement exclusions_elem = exclusions_node.toElement();
                        if( ! exclusions_elem.isNull() ) {
                              QDomNodeList sub_list = exclusions_elem.elementsByTagName("element-name");
                              uint sub_list_length = sub_list.count();
                              for( uint l = 0; l < sub_list_length; l++ ) {
                                    QDomNode sub_node = sub_list.item(l);
                                    QDomElement sub_elem = sub_node.toElement();
                                    if( !sub_elem.isNull() ) {
                                          QMap<QString,bool>::Iterator it = subelement_list.find(sub_elem.attribute("name"));
                                          if( it != subelement_list.end() ) {
                                                subelement_list.remove(it);
                                          }
                                    }
                              }
                        }
                  }

                  // turn the map into a list:
                  QStringList subelement_list_tmp;
                  QMap<QString,bool>::Iterator it;
                  for( it = subelement_list.begin(); it != subelement_list.end(); ++it ) {
                        subelement_list_tmp.append(it.key());
                  }
                  m_elements_list.insert(elem.attribute("name"), subelement_list_tmp);

            }
            
      } // end iteration over all <element> nodes
      return true;
}

/** Check which elements are allowed inside a parent element. This returns
  * a list of allowed elements, but it doesn't care about order or if only a certain
  * number of occurences is allowed.
  */
QStringList PseudoDTD::getAllowedElementsFast(QString parent_element)
{
      if( m_sgml_support ) {
            // find the matching element, ignoring case:
            QMap<QString,QStringList>::Iterator it;
            for( it = m_elements_list.begin(); it != m_elements_list.end(); ++it ) {
                  if( it.key().lower() == parent_element.lower() ) {
                        return it.data();
                  }
            }
      } else {
            if( m_elements_list.contains(parent_element) ) {
                  return m_elements_list[parent_element];
            }
      }
      return QStringList();
}

/** Iterate through the XML to get a mapping which attributes are allowed inside 
  * all elements.
  */
bool PseudoDTD::getAllowedAttributes(QDomDocument *doc, QProgressDialog *progress)
{
      m_attributes_list.clear();
      QStringList allowed_attributes;
      QDomNodeList list = doc->elementsByTagName("attlist");
      uint list_length = list.count();

      for( uint i = 0; i < list_length; i++ ) {
            if( progress->wasCancelled() ) {
                  return false;
            }
            progress->setProgress(progress->progress()+1);
            // FIXME!!
            //qApp->processEvents();
            allowed_attributes.clear();
            QDomNode node = list.item(i);
            QDomElement elem = node.toElement();
            if( !elem.isNull() ) {
                  // Enter the list of <attribute>:
                  QDomNodeList attribute_list = elem.elementsByTagName("attribute");
                  uint attribute_list_length = attribute_list.count();
                  for( uint l = 0; l < attribute_list_length; l++ ) {
                        QDomNode attribute_node = attribute_list.item(l);
                        QDomElement attribute_elem = attribute_node.toElement();
                        if( ! attribute_elem.isNull() ) {
                              allowed_attributes.append(attribute_elem.attribute("name"));
                        }
                  }
                  m_attributes_list.insert(elem.attribute("name"), allowed_attributes);
            }
      }
      return true;
}

/** Check which attributes are allowed for an element.
  */
QStringList PseudoDTD::getAllowedAttributesFast(QString element)
{
      if( m_sgml_support ) {
            // find the matching element, ignoring case:
            QMap<QString,QStringList>::Iterator it;
            for( it = m_attributes_list.begin(); it != m_attributes_list.end(); ++it ) {
                  if( it.key().lower() == element.lower() ) {
                        return it.data();
                  }
            }
      } else {
            if( m_attributes_list.contains(element) ) {
                  return m_attributes_list[element];
            }
      }
      return QStringList();
}

/** Iterate through the XML to get a mapping which attribute values are allowed
  * for all attributes inside all elements.
  */
bool PseudoDTD::getAllowedAttributeValues(QDomDocument *doc, QProgressDialog *progress)
{
      m_attributevalues_list.clear();                                   // 1 element : n possible attributes
      QMap<QString,QStringList> attributevalues_tmp;        // 1 attribute : n possible values
      QDomNodeList list = doc->elementsByTagName("attlist");
      uint list_length = list.count();

      for( uint i = 0; i < list_length; i++ ) {
            if( progress->wasCancelled() ) {
                  return false;
            }
            progress->setProgress(progress->progress()+1);
            // FIXME!
            //qApp->processEvents();
            
            attributevalues_tmp.clear();
            QDomNode node = list.item(i);
            QDomElement elem = node.toElement();
            if( !elem.isNull() ) {
                  // Enter the list of <attribute>:
                  QDomNodeList attribute_list = elem.elementsByTagName("attribute");
                  uint attribute_list_length = attribute_list.count();
                  for( uint l = 0; l < attribute_list_length; l++ ) {
                        QDomNode attribute_node = attribute_list.item(l);
                        QDomElement attribute_elem = attribute_node.toElement();
                        if( ! attribute_elem.isNull() ) {
                              QString value = attribute_elem.attribute("value");
                              attributevalues_tmp.insert(attribute_elem.attribute("name"), QStringList::split(QRegExp(" "), value));
                        }
                  }
                  m_attributevalues_list.insert(elem.attribute("name"), attributevalues_tmp);
            }
      }
      return true;
}

/** Check which attributes values are allowed for an attribute in an element
  * (the element is necessary because e.g. "href" inside <a> could be different
  * to an "href" inside <link>):
  */
QStringList PseudoDTD::getAllowedAttributeValuesFast(QString element, QString attribute)
{
      // Direct access would be faster than iteration of course but not always correct, 
      // because we need to be case-insensitive.
      if( m_sgml_support ) {
            // first find the matching element, ignoring case:
            QMap< QString,QMap<QString,QStringList> >::Iterator it;
            for( it = m_attributevalues_list.begin(); it != m_attributevalues_list.end(); ++it ) {
                  if( it.key().lower() == element.lower() ) {
                        QMap<QString,QStringList> attr_vals = it.data();
                        QMap<QString,QStringList>::Iterator it_v;
                        // then find the matching attribute for that element, ignoring case:
                        for( it_v = attr_vals.begin(); it_v != attr_vals.end(); ++it_v ) {
                              if( it_v.key().lower() == attribute.lower() ) {
                                    return(it_v.data());
                              }
                        }
                  }
            }
      } else {
            if( m_attributevalues_list.contains(element) ) {
                  QMap<QString,QStringList> attr_vals = m_attributevalues_list[element];
                  if( attr_vals.contains(attribute) ) {
                        return attr_vals[attribute];
                  }
            }
      }
      // no predefined values available:
      return QStringList();
}

/** Iterate through the XML to get a mapping of all entity names and their expanded 
  * version, e.g. nbsp => &#160;. Parameter entities are ignored.
  */
bool PseudoDTD::getEntities(QDomDocument *doc, QProgressDialog *progress)
{
      m_entity_list.clear();
      QDomNodeList list = doc->elementsByTagName("entity");
      uint list_length = list.count();
      
      for( uint i = 0; i < list_length; i++ ) {
            if( progress->wasCancelled() ) {
                  return false;
            }
            progress->setProgress(progress->progress()+1);
            //FIXME!!
            //qApp->processEvents();
            QDomNode node = list.item(i);
            QDomElement elem = node.toElement();
            if( !elem.isNull() 
                  && elem.attribute("type") != "param" ) { // TODO: what's cdata <-> gen ?
                  QDomNodeList expanded_list = elem.elementsByTagName("text-expanded");
                  QDomNode expanded_node = expanded_list.item(0);
                  QDomElement expanded_elem = expanded_node.toElement();
                  if( ! expanded_elem.isNull() ) {
                        QString exp = expanded_elem.text();
                        // TODO: support more than one &#...; in the expanded text
                        /* TODO include do this when the unicode font problem is solved:
                        if( exp.contains(QRegExp("^&#x[a-zA-Z0-9]+;$")) ) {
                              // hexadecimal numbers, e.g. "&#x236;"
                              uint end = exp.find(";");
                              exp = exp.mid(3, end-3);
                              exp = QChar();
                        } else if( exp.contains(QRegExp("^&#[0-9]+;$")) ) {
                              // decimal numbers, e.g. "&#236;"
                              uint end = exp.find(";");
                              exp = exp.mid(2, end-2);
                              exp = QChar(exp.toInt());
                        }
                        */
                        m_entity_list.insert(elem.attribute("name"), exp);
                  } else {
                        m_entity_list.insert(elem.attribute("name"), QString());
                  }
            }
      }
      return true;
}

/** Get a list of all (non-parameter) entities that start with a certain string.
  */
QStringList PseudoDTD::getEntitiesFast(QString start)
{
      QStringList entities;
      QMap<QString,QString>::Iterator it;
      for( it = m_entity_list.begin(); it != m_entity_list.end(); ++it ) {
            if( (*it).startsWith(start) ) {
                  QString str = it.key();
                  /* TODO: show entities as unicode character
                  if( !it.data().isEmpty() ) {
                        //str += " -- " + it.data();
                        QRegExp re("&#(\\d+);");
                        if( re.search(it.data()) != -1 ) {
                              uint ch = re.cap(1).toUInt();
                              str += " -- " + QChar(ch).decomposition();
                        }
                        //kdDebug() << "#" << it.data() << endl;
                  }
                  */
                  entities.append(str);
                  // TODO: later use a table view
            }
      }
      return entities;
}

Generated by  Doxygen 1.6.0   Back to index