Qt中三种解析xml的方式

在下面的随笔中,我会根据xml的结构,给出Qt中解析这个xml的三种方式的代码。虽然,这个代码时通过调用Qt的函数实现的,但是,很多开源的C++解析xml的库,甚至很多其他语言解析xml的库,都和下面三种解析xml采用相同的原理,所以就算你不是学习qt,也可以大致参看一下代码,对三种解析方式有一种大致的感觉。

先给出xml如下:

<?xml version="1.0" encoding="utf-8"?>
<school>
<teacher>
<entry name="Job">
<age>30</age>
<sport>soccer</sport>
</entry>
<entry name="Tom">
<age>32</age>
<sport>swimming</sport>
</entry>
</teacher>
<student>
<entry name="Lily">
<age>20</age>
<sport>dancing</sport>
</entry>
<entry name="Keith">
<age>21</age>
<sport>running</sport>
</entry>
</student>
</school>

下面给出qt中解析xml的三种方式,通过解析xml,创建student列表和teacher列表。先给出存储的结构体和辅助函数:

#include <string>
#include <ostream> namespace School
{ struct Teacher
{
std::string name;
int age;
std::string loveSport; Teacher(std::string name_, int age_, std::string loveSport_)
: name(std::move(name_)), age(age_), loveSport(std::move(loveSport_))
{ }
}; struct Student
{
std::string name;
int age;
std::string loveSport; Student(std::string name_, int age_, std::string loveSport_)
: name(std::move(name_)), age(age_), loveSport(std::move(loveSport_))
{ }
}; inline void print(std::ostream &out, const Teacher& teacher)
{
out << "teacher: " << teacher.name << std::endl;
out << "\tage: " << teacher.age << std::endl;
out << "\tfavorite sport: " << teacher.loveSport << std::endl;
} inline void print(std::ostream& out, const Student& student)
{
out << "student: " << student.name << std::endl;
out << "\tage: " << student.age << std::endl;
out << "\tfavorite sport: " << student.loveSport << std::endl;
} }

另外需要注意在.pro中添加

QT += xml

(1)通过QXmlStreamReader:

#include <QXmlStreamReader>
#include "schooldefine.h" class XmlStreamReader
{
public:
XmlStreamReader(); bool readFile(const QString& fileName);
void printAllMembers(); private:
void readSchoolMembers();
void readTeacherMembers();
void readTeacher(const QStringRef& teacherName);
void readStudentMembers();
void readStudent(const QStringRef& studentName);
void skipUnknownElement(); QXmlStreamReader reader; std::vector<School::Teacher> m_teachers;
std::vector<School::Student> m_students;
};
#include "XmlStreamReader.h"
#include <QFile>
#include <iostream>
#include <QDebug> XmlStreamReader::XmlStreamReader()
{ } bool XmlStreamReader::readFile(const QString &fileName)
{
QFile file(fileName);
if (!file.open(QFile::ReadOnly | QFile::Text))
{
std::cerr << "Error: Cannot read file " << qPrintable(fileName)
<< ": " << qPrintable(file.errorString())
<< std::endl;
return false;
}
reader.setDevice(&file); reader.readNext();
while (!reader.atEnd())
{
if (reader.isStartElement())
{
if (reader.name() == "school")
{
readSchoolMembers();
}
else
{
reader.raiseError(QObject::tr("Not a school file"));
}
}
else
{
reader.readNext();
}
} file.close();
if (reader.hasError())
{
std::cerr << "Error: Failed to parse file "
<< qPrintable(fileName) << ": "
<< qPrintable(reader.errorString()) << std::endl;
return false;
}
else if (file.error() != QFile::NoError)
{
std::cerr << "Error: Cannot read file " << qPrintable(fileName)
<< ": " << qPrintable(file.errorString())
<< std::endl;
return false;
}
return true;
} void XmlStreamReader::printAllMembers()
{
std::cout << "All teachers: " << std::endl;
for (const auto& teacher : m_teachers)
{
School::print(std::cout, teacher);
}
std::cout << "All students: " << std::endl;
for (const auto& student : m_students)
{
School::print(std::cout, student);
}
} void XmlStreamReader::readSchoolMembers()
{
reader.readNext();
while (!reader.atEnd())
{
if (reader.isEndElement())
{
reader.readNext();
break;
} if (reader.isStartElement())
{
if (reader.name() == "teacher")
{
readTeacherMembers();
}
else if (reader.name() == "student")
{
readStudentMembers();
}
else
{
skipUnknownElement();
}
}
else
{
reader.readNext();
}
}
} void XmlStreamReader::readTeacherMembers()
{
reader.readNext();
while (!reader.atEnd())
{
if (reader.isEndElement())
{
reader.readNext();
break;
} if (reader.isStartElement())
{
if (reader.name() == "entry")
{
readTeacher(reader.attributes().value("name"));
}
else
{
skipUnknownElement();
}
}
else
{
reader.readNext();
}
}
} void XmlStreamReader::readTeacher(const QStringRef& teacherName)
{
reader.readNext(); int age = ;
std::string favoriteSport; while (!reader.atEnd())
{
if (reader.isEndElement())
{
reader.readNext();
break;
} if (reader.isStartElement())
{
if (reader.name() == "age")
{
age = reader.readElementText().toInt();
}
else if (reader.name() == "sport")
{
favoriteSport = reader.readElementText().toStdString();
}
else
{
skipUnknownElement();
}
}
reader.readNext();
} m_teachers.emplace_back(teacherName.toString().toStdString(), age, favoriteSport);
} void XmlStreamReader::readStudentMembers()
{
reader.readNext();
while (!reader.atEnd())
{
if (reader.isEndElement())
{
reader.readNext();
break;
} if (reader.isStartElement())
{
if (reader.name() == "entry")
{
readStudent(reader.attributes().value("name"));
}
else
{
skipUnknownElement();
}
}
else
{
reader.readNext();
}
}
} void XmlStreamReader::readStudent(const QStringRef &studentName)
{
reader.readNext(); int age = ;
std::string favoriteSport; while (!reader.atEnd())
{
if (reader.isEndElement())
{
reader.readNext();
break;
} if (reader.isStartElement())
{
if (reader.name() == "age")
{
age = reader.readElementText().toInt();
}
else if (reader.name() == "sport")
{
favoriteSport = reader.readElementText().toStdString();
}
else
{
skipUnknownElement();
}
}
reader.readNext();
} m_students.emplace_back(studentName.toString().toStdString(), age, favoriteSport);
} void XmlStreamReader::skipUnknownElement()
{
reader.readNext();
while (!reader.atEnd())
{
if (reader.isEndElement())
{
reader.readNext();
break;
} if (reader.isStartElement())
{
skipUnknownElement();
}
else
{
reader.readNext();
}
}
}

(2)通过DOM方式:

#include <QString>
#include <QDomElement>
#include "schooldefine.h" class DomParser
{
public:
DomParser(); bool readFile(const QString &fileName);
void printAllMembers(); private:
void parseSchoolMembers(const QDomElement &element);
void parseTeacherMembers(const QDomElement &element);
void parseStudentMembers(const QDomElement &element);
void parseTeacher(const QDomElement &element);
void parseStudent(const QDomElement &element); std::vector<School::Teacher> m_teachers;
std::vector<School::Student> m_students;
};
#include "domparser.h"
#include <QDomDocument>
#include <QFile>
#include <iostream> DomParser::DomParser()
{ } bool DomParser::readFile(const QString &fileName)
{
QFile file(fileName);
if (!file.open(QFile::ReadOnly | QFile::Text)) {
std::cerr << "Error: Cannot read file " << qPrintable(fileName)
<< ": " << qPrintable(file.errorString())
<< std::endl;
return false;
} QString errorStr;
int errorLine;
int errorColumn; QDomDocument doc;
if (!doc.setContent(&file, false, &errorStr, &errorLine, &errorColumn))
{
std::cerr << "Error: Parse error at line " << errorLine << ", "
<< "column " << errorColumn << ": "
<< qPrintable(errorStr) << std::endl;
return false;
} QDomElement root = doc.documentElement();
if (root.tagName() != "school")
{
std::cerr << "Error: Not a school file" << std::endl;
return false;
} parseSchoolMembers(root);
return true;
} void DomParser::printAllMembers()
{
std::cout << "All teachers: " << std::endl;
for (const auto& teacher : m_teachers)
{
School::print(std::cout, teacher);
}
std::cout << "All students: " << std::endl;
for (const auto& student : m_students)
{
School::print(std::cout, student);
}
} void DomParser::parseSchoolMembers(const QDomElement &element)
{
QDomNode child = element.firstChild();
while (!child.isNull())
{
if (child.toElement().tagName() == "teacher")
{
parseTeacherMembers(child.toElement());
}
else if (child.toElement().tagName() == "student")
{
parseStudentMembers(child.toElement());
}
child = child.nextSibling();
}
} void DomParser::parseTeacherMembers(const QDomElement &element)
{
QDomNode child = element.firstChild();
while (!child.isNull())
{
if (child.toElement().tagName() == "entry")
{
parseTeacher(child.toElement());
}
child = child.nextSibling();
}
} void DomParser::parseStudentMembers(const QDomElement &element)
{
QDomNode child = element.firstChild();
while (!child.isNull())
{
if (child.toElement().tagName() == "entry")
{
parseStudent(child.toElement());
}
child = child.nextSibling();
}
} void DomParser::parseTeacher(const QDomElement &element)
{
auto children = element.childNodes();
auto firstChild = children.at().toElement();
auto secondChild = children.at().toElement();
int age = firstChild.text().toInt(); m_teachers.emplace_back(element.attribute("name").toStdString(),
age, secondChild.text().toStdString());
} void DomParser::parseStudent(const QDomElement &element)
{
auto children = element.childNodes();
auto firstChild = children.at().toElement();
auto secondChild = children.at().toElement();
int age = firstChild.text().toInt(); m_students.emplace_back(element.attribute("name").toStdString(),
age, secondChild.text().toStdString());
}

3. 采用QXmlSimpleReader方式,也就是回调函数方式:

#include <QXmlDefaultHandler>
#include "schooldefine.h" class SaxHandler : public QXmlDefaultHandler
{
public:
SaxHandler(); bool readFile(const QString &fileName);
void printAllMembers(); protected:
bool startElement(const QString &namespaceURI,
const QString &localName,
const QString &qName,
const QXmlAttributes &atts) override;
bool endElement(const QString &namespaceURL,
const QString &localName,
const QString &qName) override;
bool characters(const QString &ch) override;
bool fatalError(const QXmlParseException &exception) override; private:
bool m_isStudent = false;
QString m_currentContext;
std::vector<School::Teacher> m_teachers;
std::vector<School::Student> m_students;
};
#include "saxhandler.h"
#include <iostream> SaxHandler::SaxHandler()
{ } bool SaxHandler::readFile(const QString &fileName)
{
QFile file(fileName);
QXmlInputSource inputSource(&file);
QXmlSimpleReader reader;
reader.setContentHandler(this);
reader.setErrorHandler(this);;
return reader.parse(inputSource);
} void SaxHandler::printAllMembers()
{
std::cout << "All teachers: " << std::endl;
for (const auto& teacher : m_teachers)
{
School::print(std::cout, teacher);
}
std::cout << "All students: " << std::endl;
for (const auto& student : m_students)
{
School::print(std::cout, student);
}
} bool SaxHandler::startElement(const QString &namespaceURI,
const QString &localName,
const QString &qName,
const QXmlAttributes &atts)
{
if (qName == "teacher")
{
m_isStudent = false;
}
else if (qName == "student")
{
m_isStudent = true;
}
else if (qName == "entry")
{
if (m_isStudent)
{
m_students.push_back(School::Student("", , ""));
m_students.back().name = atts.value("name").toStdString();
}
else
{
m_teachers.push_back(School::Teacher("", , ""));
m_teachers.back().name = atts.value("name").toStdString();
}
}
else if (qName == "age")
{
m_currentContext.clear();
}
else if (qName == "sport")
{
m_currentContext.clear();
}
return true;
} bool SaxHandler::characters(const QString &ch)
{
m_currentContext += ch;
return true;
} bool SaxHandler::endElement(const QString &namespaceURL,
const QString &localName,
const QString &qName)
{
if (qName == "age")
{
if (m_isStudent)
{
m_students.back().age = m_currentContext.toInt();
}
else
{
m_teachers.back().age = m_currentContext.toInt();
}
}
else if (qName == "sport")
{
if (m_isStudent)
{
m_students.back().loveSport = m_currentContext.toStdString();
}
else
{
m_teachers.back().loveSport = m_currentContext.toStdString();
}
}
m_currentContext.clear();
return true;
} bool SaxHandler::fatalError(const QXmlParseException &exception)
{
std::cerr << "Parse error at line" << exception.lineNumber()
<< ", " << "column " << exception.columnNumber() << ": "
<< qPrintable(exception.message()) << std::endl;
return false;
}

下面简单对上述三种方式予以说明:

(1) 从代码行数来看,采用DOM和QXmlSimpleReader的方式,代码行数比较少,而QXmlStreamReader代码行数较多。

(2) 从代码逻辑分析来看,采用DOM方式最容易理解,采用QXmlStreamReader的方式稍微难理解一些,而采用QXmlSimpleReader由于使用了较多的回调,引入了大量的类数据成员,使得代码会很难理解。

(3) 从内存占用来看,DOM的方式会耗费最多的内存,因为需要一次性将所有的内容构建成树,DOM和QXmlSimpleReader对内存要求都较低。

(4) 从运行时间消耗来看,DOM的消耗,可能会稍微大一些,因为DOM正常要经历2次的遍历,一次遍历构建树,一次遍历,构建自己需要的数据。而QXmlSimpleReader和QXmlStreamReader正常只需要遍历一次。

(5) 从处理异常来看,DOM和QXmlStreamReader应该会更容易一些,因为不涉及回调函数,但是对于xml来说,很多时候主要确认内容正确与否,如果错误就退出,查看xml中的错误。当然,这个也是比较重要的项。

对于我来说,因为大多数情况下,解析的xml不是很大,而且基本只涉及加载过程中,所以使用DOM的情况比较多。如果xml比较大,或者调用比较频繁,可以考虑使用QXmlStreamReader的方式。

上一篇:浅谈学习selenium的一些知识点的总结


下一篇:Hive三种不同的数据导出的方式