rfc2047.c (5882B)
1 /* 2 * Copyright (C) 1996-8 Michael R. Elkins <me@cs.hmc.edu> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,, USA. 17 */ 18 19 /* $Id: rfc2047.c,v 1.5 2007-10-28 16:33:36 roland Exp $ */ 20 21 #include <ctype.h> 22 #include <string.h> 23 #ifdef HAVE_ICONV 24 #include <iconv.h> 25 #include <errno.h> 26 #include <limits.h> 27 #endif 28 29 #include "rfc822.h" 30 #include "rfc2047.h" 31 #include "helpers.h" 32 33 enum 34 { 35 ENCOTHER, 36 ENC7BIT, 37 ENC8BIT, 38 ENCQUOTEDPRINTABLE, 39 ENCBASE64, 40 ENCBINARY 41 }; 42 43 const char MimeSpecials[] = "@.,;<>[]\\\"()?/="; 44 const char *Charset = "utf-8"; /* default charset */ 45 46 47 int Index_hex[128] = { 48 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 49 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 50 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 51 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1, 52 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, 53 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 54 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, 55 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1 56 }; 57 58 int Index_64[128] = { 59 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 60 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 61 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63, 62 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1, 63 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, 64 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1, 65 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 66 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1 67 }; 68 69 70 #define IsPrint(c) (isprint((unsigned char)(c)) || \ 71 ((unsigned char)(c) >= 0xa0)) 72 73 #define hexval(c) Index_hex[(unsigned int)(c)] 74 #define base64val(c) Index_64[(unsigned int)(c)] 75 76 static int rfc2047_decode_word (char *d, const char *s, size_t dlen) 77 { 78 char *p = safe_strdup (s); 79 char *pp = p; 80 char *pd = d; 81 size_t len = dlen; 82 int enc = 0, filter = 0, count = 0, c1, c2, c3, c4; 83 #ifdef HAVE_ICONV 84 char *fromcharset; 85 iconv_t cd; 86 size_t in; 87 #endif 88 89 while ((pp = strtok (pp, "?")) != NULL) 90 { 91 count++; 92 switch (count) 93 { 94 case 2: 95 if (strcasecmp (pp, Charset) != 0) 96 { 97 filter = 1; 98 #ifdef HAVE_ICONV 99 fromcharset = safe_strdup (pp); 100 #endif 101 } 102 break; 103 case 3: 104 if (toupper (*pp) == 'Q') 105 enc = ENCQUOTEDPRINTABLE; 106 else if (toupper (*pp) == 'B') 107 enc = ENCBASE64; 108 else 109 return (-1); 110 break; 111 case 4: 112 if (enc == ENCQUOTEDPRINTABLE) 113 { 114 while (*pp && len > 0) 115 { 116 if (*pp == '_') 117 { 118 *pd++ = ' '; 119 len--; 120 } 121 else if (*pp == '=') 122 { 123 *pd++ = (hexval(pp[1]) << 4) | hexval(pp[2]); 124 len--; 125 pp += 2; 126 } 127 else 128 { 129 *pd++ = *pp; 130 len--; 131 } 132 pp++; 133 } 134 *pd = 0; 135 } 136 else if (enc == ENCBASE64) 137 { 138 while (*pp && len > 0) 139 { 140 c1 = base64val(pp[0]); 141 c2 = base64val(pp[1]); 142 *pd++ = (c1 << 2) | ((c2 >> 4) & 0x3); 143 if (--len == 0) break; 144 145 if (pp[2] == '=') break; 146 147 c3 = base64val(pp[2]); 148 *pd++ = ((c2 & 0xf) << 4) | ((c3 >> 2) & 0xf); 149 if (--len == 0) 150 break; 151 152 if (pp[3] == '=') 153 break; 154 155 c4 = base64val(pp[3]); 156 *pd++ = ((c3 & 0x3) << 6) | c4; 157 if (--len == 0) 158 break; 159 160 pp += 4; 161 } 162 *pd = 0; 163 } 164 break; 165 } 166 pp = 0; 167 } 168 safe_free (&p); 169 if (filter) 170 { 171 #ifdef HAVE_ICONV 172 if ((cd = iconv_open (Charset, fromcharset)) == (iconv_t)(-1)) 173 { 174 #endif 175 pd = d; 176 while (*pd) 177 { 178 if (!IsPrint (*pd)) 179 *pd = '?'; 180 pd++; 181 } 182 #ifdef HAVE_ICONV 183 } else { 184 p = safe_strdup (d); 185 pp = p; 186 in = strlen (d) + 1; 187 pd = d; 188 /* maximum available buffer length for converted string */ 189 len = dlen; 190 while (*pd && iconv (cd, &pp, &in, &pd, &len) == (size_t)(-1)) 191 { 192 if (errno == E2BIG) 193 break; 194 195 *pd = '?'; 196 pp++; 197 in--; 198 pd++; 199 len--; 200 } 201 iconv (cd, NULL, NULL, &pd, &len); 202 iconv_close (cd); 203 safe_free (&p); 204 } 205 safe_free (&fromcharset); 206 #endif 207 } 208 return (0); 209 } 210 211 /* try to decode anything that looks like a valid RFC2047 encoded 212 * header field, ignoring RFC822 parsing rules 213 */ 214 void rfc2047_decode (char *d, const char *s, size_t dlen) 215 { 216 const char *p, *q; 217 size_t n; 218 int found_encoded = 0; 219 220 dlen--; /* save room for the terminal nul */ 221 222 while (*s && dlen > 0) 223 { 224 if ((p = strstr (s, "=?")) == NULL || 225 (q = strchr (p + 2, '?')) == NULL || 226 (q = strchr (q + 1, '?')) == NULL || 227 (q = strstr (q + 1, "?=")) == NULL) 228 { 229 /* no encoded words */ 230 if (d != s) 231 strfcpy (d, s, dlen + 1); 232 return; 233 } 234 235 if (p != s) 236 { 237 n = (size_t) (p - s); 238 /* ignore spaces between encoded words */ 239 if (!found_encoded || strspn (s, " \t\r\n") != n) 240 { 241 if (n > dlen) 242 n = dlen; 243 if (d != s) 244 memcpy (d, s, n); 245 d += n; 246 dlen -= n; 247 } 248 } 249 250 rfc2047_decode_word (d, p, dlen); 251 found_encoded = 1; 252 s = q + 2; 253 n = strlen (d); 254 dlen -= n; 255 d += n; 256 } 257 *d = 0; 258 } 259 260 void rfc2047_decode_adrlist (ADDRESS *a) 261 { 262 while (a) 263 { 264 if (a->personal && strstr (a->personal, "=?") != NULL) 265 rfc2047_decode (a->personal, a->personal, strlen (a->personal) + 1); 266 a = a->next; 267 } 268 }