Sample program which will extract all text form an Word document and dump it as HTML.
#include <stdio.h>
#include <time.h>
#ifdef WIN32
#include <crtdbg.h>
#endif
mce_start_element(reader, _X(
"http://schemas.openxmlformats.org/wordprocessingml/2006/main"), _X(
"t")) {
for(
const xmlChar *txt=xmlTextReaderConstValue(reader->
reader);0!=*txt;txt++) {
switch(*txt) {
case '<':
printf("<");
break;
case '>':
printf(">");
break;
case '&':
printf("&");
break;
default:
putc(*txt, stdout);
break;
}
}
mce_start_element(reader, _X(
"http://schemas.openxmlformats.org/wordprocessingml/2006/main"), _X(
"p")) {
printf("<p>");
dumpText(reader);
printf("</p>\n");
dumpText(reader);
}
int main( int argc, const char* argv[] )
{
#ifdef WIN32
_CrtSetDbgFlag (_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
#endif
if (NULL!=c) {
if (OPC_ERROR_NONE==
opcXmlReaderOpen(c, &reader, _X(
"/word/document.xml"), NULL, 0, 0)) {
printf("<html>\n");
printf("<head>\n");
printf("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n");
printf("</head>\n");
printf("<body>\n");
dumpText(&reader);
printf("<body>\n");
printf("</html>\n");
}
}
#ifdef WIN32
#endif
return 0;
}