leeODT.cpp

Ir a la documentación de este archivo.
00001 // vim: set expandtab tabstop=8 shiftwidth=8 foldmethod=marker:
00014 #include <sys/types.h>
00015 #include <sys/stat.h>
00016 #include <stdlib.h>
00017 #include <stdio.h>
00018 #include <err.h>
00019 #include <string>
00020 #include <iostream>
00021 #include <fstream>
00022 #include <libxml/xmlmemory.h>
00023 #include <libxml/debugXML.h>
00024 #include <libxml/HTMLtree.h>
00025 #include <libxml/xmlIO.h>
00026 //#include <libxml/DOCBparser.h>
00027 #include <libxml/xinclude.h>
00028 #include <libxml/catalog.h>
00029 #include <libxslt/xslt.h>
00030 #include <libxslt/xsltInternals.h>
00031 #include <libxslt/transform.h>
00032 #include <libxslt/xsltutils.h>
00033 
00034 
00035 #include "funzipuno.hpp"
00036 #include "NodoTrieS.hpp"
00037 #include "comun.hpp"
00038 #include "leeODT.hpp"
00039 
00040 using namespace std;
00041 
00051 void
00052 aplicaXSLT(string dt, string arch, string nomsal)
00053 {
00054         //cerr << "OJO aplicaXSLT(" << dt << ", " << arch << ", " << nomsal << ")" << endl;
00055         FILE *f = NULL;
00056         string nx = dt + "/convierte.xslt";
00057         //cerr << "OJO nx=" << nx << endl;
00058         f = fopen(nx.c_str(), "w+");
00059         fprintf(f, "<!DOCTYPE xsl:stylesheet [\n"
00060                 "]>\n"
00061                 "<xsl:stylesheet version=\"1.0\"\n"
00062                 "       xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\"\n"
00063                 "       xmlns:office=\"urn:oasis:names:tc:opendocument:xmlns:office:1.0\"\n"
00064                 "       xmlns:style=\"urn:oasis:names:tc:opendocument:xmlns:style:1.0\"\n"
00065                 "       xmlns:text=\"urn:oasis:names:tc:opendocument:xmlns:text:1.0\"\n"
00066                 "       xmlns:table=\"urn:oasis:names:tc:opendocument:xmlns:table:1.0\"\n"
00067                 "       xmlns:draw=\"urn:oasis:names:tc:opendocument:xmlns:drawing:1.0\"\n"
00068                 "       xmlns:fo=\"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0\"\n"
00069                 "       xmlns:xlink=\"http://www.w3.org/1999/xlink\"\n"
00070                 "       xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n"
00071                 "       xmlns:meta=\"urn:oasis:names:tc:opendocument:xmlns:meta:1.0\"\n"
00072                 "       xmlns:number=\"urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0\"\n"
00073                 "       xmlns:svg=\"urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0\"\n"
00074                 "       xmlns:chart=\"urn:oasis:names:tc:opendocument:xmlns:chart:1.0\"\n"
00075                 "       xmlns:dr3d=\"urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0\"\n"
00076                 "       xmlns:math=\"http://www.w3.org/1998/Math/MathML\"\n"
00077                 "       xmlns:form=\"urn:oasis:names:tc:opendocument:xmlns:form:1.0\"\n"
00078                 "       xmlns:script=\"urn:oasis:names:tc:opendocument:xmlns:script:1.0\"\n"
00079                 "       xmlns:ooo=\"http://openoffice.org/2004/office\"\n"
00080                 "       xmlns:ooow=\"http://openoffice.org/2004/writer\"\n"
00081                 "       xmlns:oooc=\"http://openoffice.org/2004/calc\"\n"
00082                 "       xmlns:dom=\"http://www.w3.org/2001/xml-events\"\n"
00083                 "       xmlns:xforms=\"http://www.w3.org/2002/xforms\"\n"
00084                 "       xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n"
00085                 "       xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
00086                 "       exclude-result-prefixes=\"office style text table draw fo xlink dc meta number svg chart dr3d math form script ooo ooow oooc dom xforms xsd xsi\">\n"
00087                 "\n"
00088                 "<xsl:output method=\"text\" encoding=\"ISO-8859-1\" />\n"
00089                 "\n"
00090                 "<xsl:template match=\"office:document-contet\">\n"
00091                 "       <xsl:apply-templates select=\".//office:body\"/>\n"
00092                 "</xsl:template>\n"
00093                 "\n"
00094                 "<xsl:template match=\"office:body\"> \n"
00095                 "       <xsl:apply-templates/>\n"
00096                 "</xsl:template>\n"
00097                 "\n"
00098                 "<xsl:template match=\"text:p\">\n"
00099                 "       <xsl:apply-templates/>\n"
00100                 "</xsl:template>\n"
00101                 "\n"
00102                 "<xsl:template match=\"text()|@*\">\n"
00103                 "    <xsl:value-of select=\".\"/><xsl:text>\n"
00104                 "</xsl:text>\n"
00105                 "</xsl:template>\n"
00106                 "\n"
00107                 "<xsl:template match=\"text:h|text:index-title-template|text:table-of-content|text:user-index|office:automatic-styles|office:font-face-decls\">\n"
00108                 "</xsl:template>\n"
00109                 "\n"
00110                 "</xsl:stylesheet>\n"
00111                );
00112         fclose(f);
00113         /* Porcion tomada de http://xmlsoft.org/XSLT/tutorial/libxslttutorial.html */
00114         int i;
00115         const char *params[16 + 1];
00116         int nbparams = 0;
00117         xsltStylesheetPtr cur = NULL;
00118         xmlDocPtr doc, res;
00119 
00120         xmlSubstituteEntitiesDefault(1);
00121         xmlLoadExtDtdDefaultValue = 1;
00122         cur = xsltParseStylesheetFile((const xmlChar *)nx.c_str());
00123         doc = xmlParseFile(arch.c_str());
00124         res = xsltApplyStylesheet(cur, doc, NULL);
00125         if (nomsal == "") {
00126                 xsltSaveResultToFile(stdout, res, cur);
00127         } else {
00128                 FILE *s=fopen(nomsal.c_str(), "w");
00129                 if (s == NULL) {
00130                         stringstream ss;
00131                         ss << "No puede abrir " << nomsal ;
00132                         throw ss.str();
00133                 }
00134                 xsltSaveResultToFile(s, res, cur);
00135                 fclose(s);
00136         }
00137         xsltFreeStylesheet(cur);
00138         xmlFreeDoc(res);
00139         xmlFreeDoc(doc);
00140         xsltCleanupGlobals();
00141         xmlCleanupParser();
00142 }
00143 
00153 void
00154 extraezip(string odt, string dt)
00155 {
00156         //cerr << "OJO extraezip(" << odt << ", " << dt << ")" << endl;
00157         char nomout[PATH_MAX];
00158         FILE *in, *out;
00159 
00160         if ((in = fopen(odt.c_str(), "rb")) == (FILE *)NULL) {
00161                 stringstream ss;
00162                 ss << "No puede abrir " << odt ;
00163                 throw ss.str();
00164         }
00165         snprintf(nomout, PATH_MAX, "%s/mimetype", dt.c_str());
00166         if ((out = fopen(nomout, "wb")) == (FILE *)NULL) {
00167                 stringstream ss;
00168                 ss << "No puede escribir " << nomout;
00169                 throw ss.str();
00170         }
00171         unzipUno(in, "mimetype", out);
00172         fclose(out);
00173 
00174         fseek(in, 0, SEEK_SET);
00175         snprintf(nomout, PATH_MAX, "%s/content.xml", dt.c_str());
00176         if ((out = fopen(nomout, "wb")) == (FILE *)NULL) {
00177                 stringstream ss;
00178                 ss << "No puede escribir " << nomout ;
00179                 throw ss.str();
00180         }
00181         unzipUno(in, "content.xml", out);
00182         fclose(out);
00183 
00184         fclose(in);
00185 }
00186 
00187 
00192 string prepara(string odt)
00193 {
00194         //cerr << "OJO prepara(" << odt << ")" << endl;
00195         string dt = directorio_temp();
00196         //cerr << "OJO prepara. dt=" << dt << endl;
00197         extraezip(odt, dt);
00198         string mime = dt + string("/mimetype");
00199         fstream is(mime.c_str(), ios_base::in);
00200         char tipo[100];
00201         is.getline(tipo, 100);
00202         if (strcmp(tipo, "application/vnd.oasis.opendocument.text") != 0) {
00203                 throw "Sólo puede convertir textos OpenDocument" ;
00204         }
00205         return dt;
00206         //xsltproc  $md/odtatexto.xsl /var/tmp/odtatexto/content.xml > $s
00207 }
00208 
00210 void leeODT(const char *na, long ndoc, NodoTrieS &t, bool normalizaPal)
00211 {
00212         ASSERT(na!=NULL && na[0] != '\0' && strlen(na)<FILENAME_MAX);
00213         ASSERT(ndoc >= 0);
00214 
00215         //cerr << "OJO leeODT(" << na << ", " << ndoc << ", t)" << endl;
00216 
00217         string dt = prepara(string(na));
00218         // cerr << "OJO dt = " << dt << endl;
00219         string ns = dt + string("/salida.txt");
00220         // cerr << "OJO ns = " << ns << endl;
00221         aplicaXSLT(dt, dt + "/content.xml", ns);
00222         leeTexto(ns.c_str(), ndoc, t, normalizaPal);
00223         unlink(string(dt + "/mimetype").c_str());
00224         unlink(string(dt + "/content.xml").c_str());
00225         unlink(string(dt + "/salida.txt").c_str());
00226         unlink(string(dt + "/convierte.xslt").c_str());
00227         rmdir(dt.c_str());
00228 }
00229 

Generado el Wed Jan 6 06:58:22 2010 para Mt77 por  doxygen 1.5.4