00001
00014 #include <sys/types.h>
00015 #include <sys/stat.h>
00016 #include <stdlib.h>
00017 #include <stdio.h>
00018 #include <err.h>
00019 #include <string>
00020 #include <iostream>
00021 #include <fstream>
00022 #include <libxml/xmlmemory.h>
00023 #include <libxml/debugXML.h>
00024 #include <libxml/HTMLtree.h>
00025 #include <libxml/xmlIO.h>
00026
00027 #include <libxml/xinclude.h>
00028 #include <libxml/catalog.h>
00029 #include <libxslt/xslt.h>
00030 #include <libxslt/xsltInternals.h>
00031 #include <libxslt/transform.h>
00032 #include <libxslt/xsltutils.h>
00033
00034
00035 #include "funzipuno.hpp"
00036 #include "NodoTrieS.hpp"
00037 #include "comun.hpp"
00038 #include "leeODT.hpp"
00039
00040 using namespace std;
00041
00051 void
00052 aplicaXSLT(string dt, string arch, string nomsal)
00053 {
00054
00055 FILE *f = NULL;
00056 string nx = dt + "/convierte.xslt";
00057
00058 f = fopen(nx.c_str(), "w+");
00059 fprintf(f, "<!DOCTYPE xsl:stylesheet [\n"
00060 "]>\n"
00061 "<xsl:stylesheet version=\"1.0\"\n"
00062 " xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\"\n"
00063 " xmlns:office=\"urn:oasis:names:tc:opendocument:xmlns:office:1.0\"\n"
00064 " xmlns:style=\"urn:oasis:names:tc:opendocument:xmlns:style:1.0\"\n"
00065 " xmlns:text=\"urn:oasis:names:tc:opendocument:xmlns:text:1.0\"\n"
00066 " xmlns:table=\"urn:oasis:names:tc:opendocument:xmlns:table:1.0\"\n"
00067 " xmlns:draw=\"urn:oasis:names:tc:opendocument:xmlns:drawing:1.0\"\n"
00068 " xmlns:fo=\"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0\"\n"
00069 " xmlns:xlink=\"http://www.w3.org/1999/xlink\"\n"
00070 " xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n"
00071 " xmlns:meta=\"urn:oasis:names:tc:opendocument:xmlns:meta:1.0\"\n"
00072 " xmlns:number=\"urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0\"\n"
00073 " xmlns:svg=\"urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0\"\n"
00074 " xmlns:chart=\"urn:oasis:names:tc:opendocument:xmlns:chart:1.0\"\n"
00075 " xmlns:dr3d=\"urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0\"\n"
00076 " xmlns:math=\"http://www.w3.org/1998/Math/MathML\"\n"
00077 " xmlns:form=\"urn:oasis:names:tc:opendocument:xmlns:form:1.0\"\n"
00078 " xmlns:script=\"urn:oasis:names:tc:opendocument:xmlns:script:1.0\"\n"
00079 " xmlns:ooo=\"http://openoffice.org/2004/office\"\n"
00080 " xmlns:ooow=\"http://openoffice.org/2004/writer\"\n"
00081 " xmlns:oooc=\"http://openoffice.org/2004/calc\"\n"
00082 " xmlns:dom=\"http://www.w3.org/2001/xml-events\"\n"
00083 " xmlns:xforms=\"http://www.w3.org/2002/xforms\"\n"
00084 " xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"\n"
00085 " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
00086 " exclude-result-prefixes=\"office style text table draw fo xlink dc meta number svg chart dr3d math form script ooo ooow oooc dom xforms xsd xsi\">\n"
00087 "\n"
00088 "<xsl:output method=\"text\" encoding=\"ISO-8859-1\" />\n"
00089 "\n"
00090 "<xsl:template match=\"office:document-contet\">\n"
00091 " <xsl:apply-templates select=\".//office:body\"/>\n"
00092 "</xsl:template>\n"
00093 "\n"
00094 "<xsl:template match=\"office:body\"> \n"
00095 " <xsl:apply-templates/>\n"
00096 "</xsl:template>\n"
00097 "\n"
00098 "<xsl:template match=\"text:p\">\n"
00099 " <xsl:apply-templates/>\n"
00100 "</xsl:template>\n"
00101 "\n"
00102 "<xsl:template match=\"text()|@*\">\n"
00103 " <xsl:value-of select=\".\"/><xsl:text>\n"
00104 "</xsl:text>\n"
00105 "</xsl:template>\n"
00106 "\n"
00107 "<xsl:template match=\"text:h|text:index-title-template|text:table-of-content|text:user-index|office:automatic-styles|office:font-face-decls\">\n"
00108 "</xsl:template>\n"
00109 "\n"
00110 "</xsl:stylesheet>\n"
00111 );
00112 fclose(f);
00113
00114 int i;
00115 const char *params[16 + 1];
00116 int nbparams = 0;
00117 xsltStylesheetPtr cur = NULL;
00118 xmlDocPtr doc, res;
00119
00120 xmlSubstituteEntitiesDefault(1);
00121 xmlLoadExtDtdDefaultValue = 1;
00122 cur = xsltParseStylesheetFile((const xmlChar *)nx.c_str());
00123 doc = xmlParseFile(arch.c_str());
00124 res = xsltApplyStylesheet(cur, doc, NULL);
00125 if (nomsal == "") {
00126 xsltSaveResultToFile(stdout, res, cur);
00127 } else {
00128 FILE *s=fopen(nomsal.c_str(), "w");
00129 if (s == NULL) {
00130 stringstream ss;
00131 ss << "No puede abrir " << nomsal ;
00132 throw ss.str();
00133 }
00134 xsltSaveResultToFile(s, res, cur);
00135 fclose(s);
00136 }
00137 xsltFreeStylesheet(cur);
00138 xmlFreeDoc(res);
00139 xmlFreeDoc(doc);
00140 xsltCleanupGlobals();
00141 xmlCleanupParser();
00142 }
00143
00153 void
00154 extraezip(string odt, string dt)
00155 {
00156
00157 char nomout[PATH_MAX];
00158 FILE *in, *out;
00159
00160 if ((in = fopen(odt.c_str(), "rb")) == (FILE *)NULL) {
00161 stringstream ss;
00162 ss << "No puede abrir " << odt ;
00163 throw ss.str();
00164 }
00165 snprintf(nomout, PATH_MAX, "%s/mimetype", dt.c_str());
00166 if ((out = fopen(nomout, "wb")) == (FILE *)NULL) {
00167 stringstream ss;
00168 ss << "No puede escribir " << nomout;
00169 throw ss.str();
00170 }
00171 unzipUno(in, "mimetype", out);
00172 fclose(out);
00173
00174 fseek(in, 0, SEEK_SET);
00175 snprintf(nomout, PATH_MAX, "%s/content.xml", dt.c_str());
00176 if ((out = fopen(nomout, "wb")) == (FILE *)NULL) {
00177 stringstream ss;
00178 ss << "No puede escribir " << nomout ;
00179 throw ss.str();
00180 }
00181 unzipUno(in, "content.xml", out);
00182 fclose(out);
00183
00184 fclose(in);
00185 }
00186
00187
00192 string prepara(string odt)
00193 {
00194
00195 string dt = directorio_temp();
00196
00197 extraezip(odt, dt);
00198 string mime = dt + string("/mimetype");
00199 fstream is(mime.c_str(), ios_base::in);
00200 char tipo[100];
00201 is.getline(tipo, 100);
00202 if (strcmp(tipo, "application/vnd.oasis.opendocument.text") != 0) {
00203 throw "Sólo puede convertir textos OpenDocument" ;
00204 }
00205 return dt;
00206
00207 }
00208
00210 void leeODT(const char *na, long ndoc, NodoTrieS &t, bool normalizaPal)
00211 {
00212 ASSERT(na!=NULL && na[0] != '\0' && strlen(na)<FILENAME_MAX);
00213 ASSERT(ndoc >= 0);
00214
00215
00216
00217 string dt = prepara(string(na));
00218
00219 string ns = dt + string("/salida.txt");
00220
00221 aplicaXSLT(dt, dt + "/content.xml", ns);
00222 leeTexto(ns.c_str(), ndoc, t, normalizaPal);
00223 unlink(string(dt + "/mimetype").c_str());
00224 unlink(string(dt + "/content.xml").c_str());
00225 unlink(string(dt + "/salida.txt").c_str());
00226 unlink(string(dt + "/convierte.xslt").c_str());
00227 rmdir(dt.c_str());
00228 }
00229