DuiLib 源码分析之解析xml类CMarkup & CMarkupNode cpp文件

时隔5个月才有时间接着写未完成的实现部分,也是惭愧呀

选几个关机的函数来解析,一些get方法就忽略掉吧

CMarkupNode 与 CMarkUp 互为友元类,CMarkUp 实现解析,CMarkupNode 用于存储读取节点数据

 void CMarkupNode::_MapAttributes()
{
m_nAttributes = ;
LPCTSTR pstr = m_pOwner->m_pstrXML + m_pOwner->m_pElements[m_iPos].iStart;
LPCTSTR pstrEnd = m_pOwner->m_pstrXML + m_pOwner->m_pElements[m_iPos].iData;
pstr += _tcslen(pstr) + ;
while( pstr < pstrEnd ) {
m_pOwner->_SkipWhitespace(pstr);
m_aAttributes[m_nAttributes].iName = pstr - m_pOwner->m_pstrXML;//位移
pstr += _tcslen(pstr) + ;
m_pOwner->_SkipWhitespace(pstr);
if( *pstr++ != _T('\"') ) return; // if( *pstr != _T('\"') ) { pstr = ::CharNext(pstr); return; } m_aAttributes[m_nAttributes++].iValue = pstr - m_pOwner->m_pstrXML;//位移
if( m_nAttributes >= MAX_XML_ATTRIBUTES ) return;
pstr += _tcslen(pstr) + ;
}
}

这个函数的主要作用是将已经处理过的xml文件进行数据分割保存,这里保存的属性名和属性值都是xml在内存中的位移,最大属性支持64个

接下来详细说明CMarkUp类

有几个用于加载xml文件的函数:

bool CMarkup::Load(LPCTSTR pstrXML)//直接解析字符串

bool CMarkup::LoadFromMem(BYTE* pByte, DWORD dwSize, int encoding)//将二进制数据流转换为字符串再解析

bool CMarkup::LoadFromFile(LPCTSTR pstrFilename, int encoding)//解析xml文件,根据文件名解析,先判断资源是否被打包到zip压缩包中

 bool CMarkup::_Parse()//解析入口, 先拓展节点保证有足够的节点存储,然后解析
{
_ReserveElement(); // Reserve index 0 for errors
::ZeroMemory(m_szErrorMsg, sizeof(m_szErrorMsg));
::ZeroMemory(m_szErrorXML, sizeof(m_szErrorXML));
LPTSTR pstrXML = m_pstrXML;
return _Parse(pstrXML, );
}
 CMarkup::XMLELEMENT* CMarkup::_ReserveElement()//拓展节点数
{
if( m_nElements == ) m_nReservedElements = ;
if( m_nElements >= m_nReservedElements ) {
m_nReservedElements += (m_nReservedElements / ) + ;
m_pElements = static_cast<XMLELEMENT*>(realloc(m_pElements, m_nReservedElements * sizeof(XMLELEMENT)));//这里的realloc函数会将原来的内容复制到新申请的内存中
}
return &m_pElements[m_nElements++];
}
 bool CMarkup::_Parse(LPTSTR& pstrText, ULONG iParent)
{
_SkipWhitespace(pstrText);//跳过空格
ULONG iPrevious = ;
for( ; ; )
{
if( *pstrText == _T('\0') && iParent <= ) return true;//退出条件,到结尾,或者无父节点
_SkipWhitespace(pstrText);
if( *pstrText != _T('<') ) return _Failed(_T("Expected start tag"), pstrText);
if( pstrText[] == _T('/') ) return true;
*pstrText++ = _T('\0');
_SkipWhitespace(pstrText);
// Skip comment or processing directive 跳过注释(<- ->)或指令(<? ?>)
if( *pstrText == _T('!') || *pstrText == _T('?') ) {
TCHAR ch = *pstrText;
if( *pstrText == _T('!') ) ch = _T('-');
while( *pstrText != _T('\0') && !(*pstrText == ch && *(pstrText + ) == _T('>')) ) pstrText = ::CharNext(pstrText);
if( *pstrText != _T('\0') ) pstrText += ;
_SkipWhitespace(pstrText);
continue;
}
_SkipWhitespace(pstrText);
// Fill out element structure
XMLELEMENT* pEl = _ReserveElement();
ULONG iPos = pEl - m_pElements;
pEl->iStart = pstrText - m_pstrXML;
pEl->iParent = iParent;
pEl->iNext = pEl->iChild = ;
if( iPrevious != ) m_pElements[iPrevious].iNext = iPos;
else if( iParent > ) m_pElements[iParent].iChild = iPos;
iPrevious = iPos;
// Parse name
LPCTSTR pstrName = pstrText;
_SkipIdentifier(pstrText);
LPTSTR pstrNameEnd = pstrText;
if( *pstrText == _T('\0') ) return _Failed(_T("Error parsing element name"), pstrText);
// Parse attributes
if( !_ParseAttributes(pstrText) ) return false; //解析属性
_SkipWhitespace(pstrText);
if( pstrText[] == _T('/') && pstrText[] == _T('>') ) //结尾是/>情况
{
pEl->iData = pstrText - m_pstrXML; //保存节点的结尾位移
*pstrText = _T('\0');
pstrText += ;
}
else //结尾是>情况
{
if( *pstrText != _T('>') ) return _Failed(_T("Expected start-tag closing"), pstrText);
// Parse node data
pEl->iData = ++pstrText - m_pstrXML;
LPTSTR pstrDest = pstrText;
if( !_ParseData(pstrText, pstrDest, _T('<')) ) return false;//找到<符号
// Determine type of next element
if( *pstrText == _T('\0') && iParent <= ) return true; //如果是结尾则返回
if( *pstrText != _T('<') ) return _Failed(_T("Expected end-tag start"), pstrText);
if( pstrText[] == _T('<') && pstrText[] != _T('/') )
{
if( !_Parse(pstrText, iPos) ) return false; //递归解析子节点
}
if( pstrText[] == _T('<') && pstrText[] == _T('/') ) //处理</>情况
{
*pstrDest = _T('\0');
*pstrText = _T('\0');
pstrText += ;
_SkipWhitespace(pstrText);
SIZE_T cchName = pstrNameEnd - pstrName;
if( _tcsncmp(pstrText, pstrName, cchName) != ) return _Failed(_T("Unmatched closing tag"), pstrText);
pstrText += cchName;
_SkipWhitespace(pstrText);
if( *pstrText++ != _T('>') ) return _Failed(_T("Unmatched closing tag"), pstrText);
}
}
*pstrNameEnd = _T('\0');
_SkipWhitespace(pstrText);
}
}
 void CMarkup::_SkipWhitespace(LPCTSTR& pstr) const
{
while( *pstr > _T('\0') && *pstr <= _T(' ') ) pstr = ::CharNext(pstr);
} void CMarkup::_SkipWhitespace(LPTSTR& pstr) const
{
while( *pstr > _T('\0') && *pstr <= _T(' ') ) pstr = ::CharNext(pstr);
} void CMarkup::_SkipIdentifier(LPCTSTR& pstr) const
{
// 属性只能用英文,所以这样处理没有问题
while( *pstr != _T('\0') && (*pstr == _T('_') || *pstr == _T(':') || _istalnum(*pstr)) ) pstr = ::CharNext(pstr);
} void CMarkup::_SkipIdentifier(LPTSTR& pstr) const
{
// 属性只能用英文,所以这样处理没有问题
while( *pstr != _T('\0') && (*pstr == _T('_') || *pstr == _T(':') || _istalnum(*pstr)) ) pstr = ::CharNext(pstr);
} bool CMarkup::_ParseAttributes(LPTSTR& pstrText)
{
if( *pstrText == _T('>') ) return true;
*pstrText++ = _T('\0');
_SkipWhitespace(pstrText);
while( *pstrText != _T('\0') && *pstrText != _T('>') && *pstrText != _T('/') ) {
_SkipIdentifier(pstrText); //跳过属性名
LPTSTR pstrIdentifierEnd = pstrText;
_SkipWhitespace(pstrText); //跳过空白
if( *pstrText != _T('=') ) return _Failed(_T("Error while parsing attributes"), pstrText);
*pstrText++ = _T(' '); //'='也赋值为空格
*pstrIdentifierEnd = _T('\0');
_SkipWhitespace(pstrText);
if( *pstrText++ != _T('\"') ) return _Failed(_T("Expected attribute value"), pstrText);//必须为双引号
LPTSTR pstrDest = pstrText;
if( !_ParseData(pstrText, pstrDest, _T('\"')) ) return false;//解析属性数据
if( *pstrText == _T('\0') ) return _Failed(_T("Error while parsing attribute string"), pstrText);
*pstrDest = _T('\0');
if( pstrText != pstrDest ) *pstrText = _T(' ');
pstrText++;
_SkipWhitespace(pstrText);
}
return true;
} bool CMarkup::_ParseData(LPTSTR& pstrText, LPTSTR& pstrDest, char cEnd)
{
while( *pstrText != _T('\0') && *pstrText != cEnd ) {
if( *pstrText == _T('&') ) {
while( *pstrText == _T('&') ) {
_ParseMetaChar(++pstrText, pstrDest);//解析同义字符&quot;等
}
if (*pstrText == cEnd)
break;
} if( *pstrText == _T(' ') ) {
*pstrDest++ = *pstrText++;
if( !m_bPreserveWhitespace ) _SkipWhitespace(pstrText);
}
else {
LPTSTR pstrTemp = ::CharNext(pstrText);
while( pstrText < pstrTemp) {
*pstrDest++ = *pstrText++;
}
}
}
// Make sure that MapAttributes() works correctly when it parses
// over a value that has been transformed.
LPTSTR pstrFill = pstrDest + ;
while( pstrFill < pstrText ) *pstrFill++ = _T(' ');//填充空格,比如存在&quot;情况
return true;
} void CMarkup::_ParseMetaChar(LPTSTR& pstrText, LPTSTR& pstrDest)
{
if( pstrText[] == _T('a') && pstrText[] == _T('m') && pstrText[] == _T('p') && pstrText[] == _T(';') ) {
*pstrDest++ = _T('&');
pstrText += ;
}
else if( pstrText[] == _T('l') && pstrText[] == _T('t') && pstrText[] == _T(';') ) {
*pstrDest++ = _T('<');
pstrText += ;
}
else if( pstrText[] == _T('g') && pstrText[] == _T('t') && pstrText[] == _T(';') ) {
*pstrDest++ = _T('>');
pstrText += ;
}
else if( pstrText[] == _T('q') && pstrText[] == _T('u') && pstrText[] == _T('o') && pstrText[] == _T('t') && pstrText[] == _T(';') ) {
*pstrDest++ = _T('\"');
pstrText += ;
}
else if( pstrText[] == _T('a') && pstrText[] == _T('p') && pstrText[] == _T('o') && pstrText[] == _T('s') && pstrText[] == _T(';') ) {
*pstrDest++ = _T('\'');
pstrText += ;
}
else {
*pstrDest++ = _T('&');
}
}

解析xml的基本原理就是,将xml加载到内存中,顺序解析节点,首先对节点进行存储,对xml进行改写(将<、>、/、"、'等改写为空格),获取节点

属性的时候进行分割存储。

简单举个例子会更清晰:

 <?xml version="1.0" encoding="utf-8"?>
<Window size="800,572" sizebox="4,4,6,6" roundcorner="5,5" caption="0,0,0,90" mininfo="800,570">
<Font name="宋体" size="13" bold="true" />
<VerticalLayout bkcolor="#FFD1E8F5" bkcolor2="#FFC6E0F1" bordercolor="#FF768D9B" bordersize="1" borderround="5,5" inset="1,0,1,0">
<HorizontalLayout>
<Container width="22" height="22" bkimage="file='icon.png' source='0,0,16,16' dest='5,4,21,20' " />
<Text text="360安全卫士7.3" pos="22, 5, 200, 24" float="true" textcolor="#FF447AA1" font="0" />
</HorizontalLayout>
</VerticalLayout>
</Window>

比如解析上述xml文件

 \0Window\0size\0 800,572\0 sizebox\0 4,4,6,6\0 roundcorner\0 \05,5\0 caption\0 0,0,0,90\0 mininfo\0 800,570\0>
\0Font\0name\0 宋体\0 size\0 13\0 bold\0 true\0 \0>
\0VerticalLayout\0bkcolor\0 #FFD1E8F5\0 bkcolor2\0 #FFC6E0F1\0 bordercolor\0 #FF768D9B\0 bordersize\0 1\0 borderround\0 5,5\0 inset\0 1,0,1,0\0>
\0HorizontalLayout\0>
\0Container\0 width\0 22\0 height\0 22\0 bkimage\0 file\0' icon.png' source='0,0,16,16' dest='5,4,21,20' \0 \0>
\0Text\0 text\0 360安全卫士7.3\0 pos\0 22, 5, 200, 24\0 float\0 true\0 textcolor\0 #FF447AA1\0 font\0 0\0 \0>
\0\0HorizontalLayout>
\0\0VerticalLayout>
\0\0Window>
 void CMarkupNode::_MapAttributes()
{
m_nAttributes = ;
LPCTSTR pstr = m_pOwner->m_pstrXML + m_pOwner->m_pElements[m_iPos].iStart;
LPCTSTR pstrEnd = m_pOwner->m_pstrXML + m_pOwner->m_pElements[m_iPos].iData;
pstr += _tcslen(pstr) + ;
while( pstr < pstrEnd ) {
m_pOwner->_SkipWhitespace(pstr);
m_aAttributes[m_nAttributes].iName = pstr - m_pOwner->m_pstrXML;//位移
pstr += _tcslen(pstr) + ;
m_pOwner->_SkipWhitespace(pstr);
if( *pstr++ != _T('\"') ) return; // if( *pstr != _T('\"') ) { pstr = ::CharNext(pstr); return; } m_aAttributes[m_nAttributes++].iValue = pstr - m_pOwner->m_pstrXML;//位移
if( m_nAttributes >= MAX_XML_ATTRIBUTES ) return;
pstr += _tcslen(pstr) + ;
}
}

然后看获取属性的函数就一目了然了

上一篇:SQL语句执行性能


下一篇:JNI动态注册native方法及JNI数据使用