Mt77: Fichero Fuente comun.cpp

00001 // vim: set expandtab tabstop=8 shiftwidth=8 foldmethod=marker:
00012 #include <set>
00013 #include <iostream>
00014 #include <iomanip>
00015 #include <list>
00016 #include <vector>
00017 #include <fstream>
00018 #include <istream>
00019 #include <sstream>
00020 
00021 #include <sys/types.h>
00022 #include <sys/stat.h>
00023 #include <unistd.h>
00024 #include <err.h>
00025 
00026 // using namespace std;
00027 // La anterior comentada por poco portable
00028 
00029 #include "comun.hpp"
00030 
00031 
00036 vector<string> estalla(const string &delimitador, const string &cad)
00037 {
00038         vector<string> arr;
00039 
00040         int cadlong = cad.length();
00041         int dellong = delimitador.length();
00042         if (dellong == 0)
00043                 return arr;
00044 
00045         int i = 0;
00046         int k = 0;
00047         while( i < cadlong ) {
00048                 int j=0;
00049                 while (i + j < cadlong && j < dellong && cad[i + j] == delimitador[j]) {
00050                         j++;
00051                 }
00052                 if (j == dellong) {
00053                         arr.push_back( cad.substr(k, i-k) );
00054                         i += dellong;
00055                         k = i;
00056                 } else {
00057                         i++;
00058                 }
00059         }
00060         arr.push_back(cad.substr(k, i-k) );
00061 
00062         return arr;
00063 }
00064 
00065 void verificaNombre(const char *na, char *nrel)
00066 {
00067         ASSERT(na != NULL);
00068         ASSERT(nrel != NULL);
00069         /*      nrel[MAXLURL - 1] = '\0';
00070                 ASSERT(nrel[MAXLURL - 1] == '\0'); */
00071 
00072         if (strcmp(na + (strlen(na) - 7), ".indice") != 0) {
00073                 stringstream ss;
00074                 ss << "Se esperaba extensión .indice de '" << na << "'" ;
00075                 throw ss.str();
00076         }
00077         if (strlen(na) > MAXLURL - 3) {
00078                 stringstream ss;
00079                 ss << "Nombre de archivo excede "
00080                 << MAXLURL - 3 << " caracteres" ;
00081                 throw ss.str();
00082         }
00083         strlcpy(nrel, na, strlen(na) - 6);
00084         strlcat(nrel, ".relacion", MAXLURL);
00085 }
00086 
00088 string
00089 prefijo_comun_mas_largo(string a, string b)
00090 {
00091         int c;
00092         int m = min(min(a.size(), b.size()), (unsigned long)MAXCAD);
00093         for (c = 0 ; c < m && a[c] == b[c] ; c++) {}
00094         return a.substr(0, c);
00095 }
00096 
00097 std::string errorFormato(std::istream &is, string m)
00098 {
00099         stringstream r;
00100         r << max(0L, (long)is.tellg() - 1) << ": " << m;
00101         return r.str();
00102 }
00103 
00106 string leeCad(std::istream &is) throw(string)
00107 {
00108         int c = is.get();
00109         string cad = "";
00110         if (c == '\n' || c == EOF) {
00111                 return "";
00112         }
00113         while (c != FINCADENA && c != EOF && cad.size() < MAXCAD) {
00114                 cad += c;
00115                 c = is.get();
00116         }
00117         if (cad.size() >= MAXCAD) {
00118                 cerr << "No se indexó completa palabra larga: " << cad << endl;
00119                 while (c != FINCADENA && c != EOF) {
00120                         c =  is.get();
00121                 }
00122         }
00123         if (c != FINCADENA) {
00124 
00125                 throw errorFormato(is, string("Se esperaba ") + FINCADENA);
00126         }
00127         return cad;
00128 }
00129 
00131 long
00132 leeHex(std::istream &is) throw(std::string)
00133 {
00134         unsigned int i;
00135         long valor = 0;
00136         int c = 0;
00137 
00138         //clog << "OJO leeHex..." << endl;
00139         for (i = 0; i < (int)MAXLHEX; i++) {
00140                 c = is.get();
00141                 //clog << "OJO leeHex i=" << i << ", c=" << (char)c << endl;
00142                 if (c == EOF) {
00143                         std::stringstream ss;
00144                         ss << "Se esperaban " << MAXLHEX << " digitos no " << i;
00145                         throw errorFormato(is, ss.str());
00146                 }
00147                 if ((c < '0' || c > '9') && (c < 'a' || c > 'f')) {
00148                         throw errorFormato(is, "Se esperaba digito hexadecimal");
00149                 }
00150                 valor = valor * 16 + ((c >= '0' && c <= '9') ?
00151                                       c - '0' : c - 'a' + 10);
00152                 //clog << "OJO leeHex valor=" << valor << endl;
00153         }
00154 
00155         return valor;
00156 }
00157 
00158 
00159 /*
00160  * Escribe en os un entero como hexadecimal de 8 digitos.
00161  * @param n Entero, debe ser menor o igual a 0xffffffff
00162  */
00163 void escribeHex(std::ostream &os, unsigned long n)
00164 {
00165         ASSERT(n <= 0xffffffff);
00166         os << setfill('0') << setw(8) << setbase(16) << n ;
00167 }
00168 
00169 
00170 void escribe128b(std::ostream &os, unsigned long n)
00171 {
00172         // clog << "OJO escribe128b n=" << n << endl;
00173         ASSERT(n <= 0xffffffff);
00174         string res = "";
00175         unsigned long c = n;
00176 
00177         for (int i = 0; i<MAXL128B; i++) {
00178                 char r = (c % 128) + '0';
00179                 res = r + res;
00180                 c = c / 128;
00181         }
00182 
00183         os << res;
00184 }
00185 
00186 
00187 long
00188 lee128b(std::istream &is) throw (std::string)
00189 {
00190         int i;
00191         long valor = 0;
00192         int c = 0;
00193 
00194         for (i = 0; i < (int)MAXL128B; i++) {
00195                 c = is.get();
00196                 //clog << "OJO lee128bx c=" << c << endl;
00197                 if (c == EOF) {
00198                         std::stringstream ss;
00199                         ss << "Se esperaban  " << MAXL128B << " digitos, no " << i;
00200                         throw errorFormato(is, ss.str());
00201                 }
00202 
00203                 if (c < '0' || c > ('0' + 127)) {
00204                         throw errorFormato(is, "Se esperaba digito 128b");
00205                 }
00206                 valor = valor * 128 + (c - '0');
00207         }
00208 
00209 
00210         return valor;
00211 }
00212 
00213 
00236 string normalizaCaracter(char c)
00237 {
00238         string o = "";
00239         if ((c >= 'A' && c <= 'Z') ||
00240                         (c >= '0' && c <= '9') ) { //Mayúsculas
00241                 o += c;
00242         }
00243         if (c >= 'a' && c <= 'z') { //Minúsculas pasan a mayúsculas
00244                 o += (c - ('a' - 'A'));
00245         } else if (c == 'á' || c == 'Á') { // Tildes
00246                 o += 'A';
00247         } else if (c == 'é' || c == 'É') { // Tildes
00248                 o += 'E';
00249         } else if (c == 'í' || c == 'Í') { // Tildes
00250                 o += 'I';
00251         } else if (c == 'ó' || c == 'Ó') { // Tildes
00252                 o += 'O';
00253         } else if (c == 'ú' || c == 'Ú') { // Tildes
00254                 o += 'U';
00255         } else if (c == 'ü' || c == 'Ü') { // Dieresis
00256                 o += 'U';
00257         } else if (c == 'ñ') {
00258                 o += 'Ñ';
00259         }
00260         // Lo demás se ignora
00261         return o;
00262 }
00263 
00264 const int tamnoagregan = 40;
00265 string noagregan[tamnoagregan] = {
00266 
00267                                          "Y",  // Conjunciones
00268                                          "O",
00269                                          "EL", // Artículos
00270                                          "LA",
00271                                          "LOS",
00272                                          "LAS",
00273                                          "UN",
00274                                          "UNA",
00275                                          "UNOS",
00276                                          "UNAS",
00277                                          "A", // Preposiciones http://www.apoyolingua.com/LASPREPOSICIONES.htm
00278                                          "ANTE",
00279                                          "BAJO",
00280                                          "CON",
00281                                          "CONTRA",
00282                                          "DE",
00283                                          "DESDE",
00284                                          "DURANTE",
00285                                          "EN",
00286                                          "ENTRE",
00287                                          "HACIA",
00288                                          "HASTA",
00289                                          "MEDIANTE",
00290                                          "PARA",
00291                                          "POR",
00292                                          "SEGUN",
00293                                          "SIN",
00294                                          "SOBRE",
00295                                          "TRAS",
00296                                          "QUE", //Otros
00297                                          "LE",
00298                                          "LES",
00299                                          "DEL",
00300                                          "AL",
00301                                          "CUANDO",
00302                                          "SU",
00303                                          "SUS",
00304                                          "COMO",
00305                                          "O",
00306                                          "MAS"
00307                                  };
00308 
00309 
00310 
00316 string normaliza(string s)
00317 {
00318 
00319         string::iterator i;
00320         string o = "";
00321         int c;
00322         for (c = 0, i = s.begin(); i != s.end() && c <= (int)MAXCAD;
00323                         c++ , i++) {
00324                 if (*i == '.' && (i+1 != s.end()) &&
00325                                 (normalizaCaracter(*(i+1)) != "")) {
00326                         o += ".";
00327                 } else {
00328                         o += normalizaCaracter(*i);
00329                 }
00330         }
00331 
00332         for (int i = 0; i < tamnoagregan; i++) {
00333                 if (o == noagregan[i]) {
00334                         return string("");
00335                 }
00336         }
00337 
00338         return o;
00339 }
00340 
00341 
00342 int car_utf8_a_latin1(const char *u8, char *latin1)
00343 {
00344         ASSERT(u8 != NULL);
00345         ASSERT(latin1 != NULL);
00346         ASSERT(u8[0] != '\0');
00347         int num;
00348 
00349         /*      clog << "car_utf8_a_latin1(\"" << u8 << "\")" << endl;
00350                 clog << "ascii *u8=" << (unsigned int)(*u8) << endl;
00351                 clog << "*u8 & 0xc0=" << ((unsigned int)*u8 & 0xc0) << endl; */
00352 
00353         /*if (*u8 & 0x80) {
00354         } else*/
00355         if ((((unsigned int)*u8 & 0xc0) == 0xc0) && u8[1] != '\0') {
00356                 *latin1 = u8[0] << 6 | (u8[1] & 0x3f);
00357                 num = 2;
00358         } else {
00359                 *latin1 = *u8;
00360                 num = 1;
00361         }
00362 
00363         //clog << "car_utf8_a_latin1 por salir con num=" << num << " y *latin1 ='" << *latin1 << "'" << endl;
00364         return num;
00365 }
00366 
00367 string utf8_a_latin1(const char *u8, int len)
00368 {
00369         ASSERT(u8 != NULL);
00370         ASSERT(len >= 0);
00371         string r = "";
00372 
00373         /*clog << "utf8_a_latin1(\"" << u8 << "\", "<< len << ")" << endl;*/
00374         char *i = (char *)u8;
00375         while (*i != '\0' && i - u8 < len) {
00376                 char latin1;
00377                 int nb = car_utf8_a_latin1(i, &latin1);
00378                 i += nb;
00379                 r += latin1;
00380                 //clog << "OJO tras iteracion i = '" << i << "', r='" << r << "'" << endl;
00381         }
00382         r += "";
00383         return r;
00384 }
00385 
00386 
00387 string
00388 directorio_temp()
00389 {
00390         //clog << "OJO directorio_tempo()" << endl;
00391         char sfn[100];
00392 
00393         strlcpy(sfn, "/tmp/leeXXXXXX", sizeof(sfn));
00394         if (mkdtemp(sfn) == NULL) {
00395                 stringstream ss;
00396                 ss << strerror(errno);
00397                 throw ss.str();
00398         }
00399         return string(sfn);
00400 }
00401 
00402 
00403