rfc822.c (13497B)
1 /* Mixmaster version 3.0 -- (C) 1999 - 2006 Anonymizer Inc. and others. 2 3 Mixmaster may be redistributed and modified under certain conditions. 4 This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF 5 ANY KIND, either express or implied. See the file COPYRIGHT for 6 details. 7 8 Parse RFC 822 headers 9 $Id: rfc822.c 934 2006-06-24 13:40:39Z rabbi $ */ 10 11 12 #include "mix3.h" 13 14 static int is_specials(int c); 15 static int is_qtext(char c); 16 static int is_ctext(char c); 17 static void wsc(BUFFER *in, BUFFER *xomment); 18 static int word(BUFFER *in, BUFFER *word, BUFFER *x); 19 static int atom(BUFFER *in, BUFFER *atom, BUFFER *x); 20 static int quoted_string(BUFFER *in, BUFFER *string, BUFFER *x); 21 static int comment(BUFFER *in, BUFFER *string); 22 static int local_part(BUFFER *in, BUFFER *addr, BUFFER *x); 23 static int domain(BUFFER *in, BUFFER *domain, BUFFER *x); 24 static int sub_domain(BUFFER *in, BUFFER *sub, BUFFER *x); 25 static int domain_ref(BUFFER *in, BUFFER *dom, BUFFER *x); 26 static int domain_literal(BUFFER *in, BUFFER *dom, BUFFER *x); 27 static int addr_spec(BUFFER *in, BUFFER *addr, BUFFER *x); 28 static int route_addr(BUFFER *in, BUFFER *addr, BUFFER *x); 29 static int phrase(BUFFER *in, BUFFER *phr, BUFFER *x); 30 static int mailbox(BUFFER *in, BUFFER *mailbox, BUFFER *name, BUFFER *x); 31 static int group(BUFFER *in, BUFFER *group, BUFFER *name, BUFFER *x); 32 33 static void backtrack(BUFFER *b, int len) 34 { 35 if (b) { 36 b->length = len; 37 b->data[b->length] = '\0'; 38 } 39 } 40 41 /* white space and comments */ 42 static void wsc(BUFFER *in, BUFFER *string) 43 { 44 int c; 45 46 for (;;) { 47 c = buf_getc(in); 48 if (c == -1) 49 break; 50 else if (c == '\n') { 51 c = buf_getc(in); 52 if (c != ' ' && c != '\t') { 53 if (c != -1) 54 buf_ungetc(in), buf_ungetc(in); 55 break; 56 } 57 } else { 58 if (c != ' ' && c != '\t') { 59 buf_ungetc(in); 60 if (!comment(in, string)) 61 break; 62 } 63 } 64 } 65 } 66 67 /* specials = "(" / ")" / "<" / ">" / "@" ; Must be in quoted- 68 * / "," / ";" / ":" / "\" / <"> ; string, to use 69 * / "." / "[" / "]" ; within a word. 70 */ 71 72 static int is_specials(int c) 73 { 74 return (c == '(' || c == ')' || c == '<' || c == '>' || c == '@' || 75 c == ',' || c == ';' || c == ':' || c == '\\' || c == '\"' || 76 c == '.' || c == '[' || c == ']'); 77 } 78 79 /* qtext = <any CHAR excepting <">, ; => may be folded 80 * "\" & CR, and including 81 * linear-white-space> 82 */ 83 static int is_qtext(char c) 84 { 85 return (c != '\"' && c != '\\' && c != '\n'); 86 } 87 88 /* ctext = <any CHAR excluding "(", ; => may be folded 89 * ")", "\" & CR, & including 90 * linear-white-space> 91 */ 92 static int is_ctext(char c) 93 { 94 return (c != '(' && c != ')' && c != '\\' && c != '\n'); 95 } 96 97 /* word = atom / quoted-string 98 */ 99 static int word(BUFFER *in, BUFFER *word, BUFFER *x) 100 { 101 return (atom(in, word, x) || quoted_string(in, word, x)); 102 } 103 104 /* atom = 1*<any CHAR except specials, SPACE and CTLs> 105 */ 106 static int atom(BUFFER *in, BUFFER *atom, BUFFER *x) 107 { 108 int c; 109 110 buf_clear(atom); 111 wsc(in, x); 112 for (;;) { 113 c = buf_getc(in); 114 if (c == -1) 115 break; 116 else if (is_specials(c) || c == ' ' || c < 32 || c == 127) { 117 buf_ungetc(in); 118 break; 119 } else 120 buf_appendc(atom, c); 121 } 122 if (atom->length) 123 wsc(in, x); 124 return (atom->length); 125 } 126 127 /* quoted-string = <"> *(qtext/quoted-pair) <">; Regular qtext or 128 * ; quoted chars. 129 */ 130 static int quoted_string(BUFFER *in, BUFFER *string, BUFFER *x) 131 { 132 int ptr, xlen; 133 int c; 134 135 ptr = in->ptr, xlen = x ? x->length : 0; 136 buf_clear(string); 137 wsc(in, NULL); 138 c = buf_getc(in); 139 if (c == '\"') { 140 #if 0 141 buf_appendc(string, c); 142 #endif 143 for (;;) { 144 c = buf_getc(in); 145 if (c == -1) /* catch unterminated quoted string */ 146 break; 147 if (is_qtext(c)) 148 buf_appendc(string, c); 149 else if (c == '\n') { 150 c = buf_getc(in); 151 if (c != ' ' && c != '\n') 152 break; 153 } else if (c == '\\') { 154 c = buf_getc(in); 155 if (c == -1) 156 break; 157 else 158 buf_appendc(string, c); 159 } else if (c == '\"') { 160 #if 0 161 buf_appendc(string, c); 162 #endif 163 wsc(in, NULL); 164 return (1); 165 } else 166 break; 167 } 168 } 169 in->ptr = ptr, backtrack(x, xlen); 170 return (0); 171 } 172 173 /* comment = "(" *(ctext / quoted-pair / comment) ")" 174 */ 175 static int comment(BUFFER *in, BUFFER *string) 176 { 177 int ptr, xlen; 178 int separator = 0; 179 int c; 180 181 ptr = in->ptr; 182 xlen = string ? string->length : 0; 183 if (xlen) 184 separator = 1; 185 c = buf_getc(in); 186 if (c == '(') { 187 for (;;) { 188 c = buf_getc(in); 189 if (c == -1) 190 return(1); /* unterminated comment, bail out */ 191 if (is_ctext(c)) { 192 if (string != NULL) { 193 if (separator) 194 buf_appendc(string, ' '), separator = 0; 195 buf_appendc(string, c); 196 } 197 } else if (c == '\n') { 198 c = buf_getc(in); 199 if (c != ' ' && c != '\n') 200 break; 201 } else if (c == '\\') { 202 c = buf_getc(in); 203 if (c != -1) { 204 if (string != NULL) { 205 if (separator) 206 buf_appendc(string, ' '), separator = 0; 207 buf_appendc(string, c); 208 } 209 } 210 } else if (c == ')') 211 return (1); 212 else { 213 BUFFER *s; 214 int o; 215 216 s = buf_new(); 217 buf_ungetc(in); 218 o = comment(in, s); 219 if (o && string != NULL) { 220 if (separator) 221 buf_appendc(string, ' '), separator = 0; 222 buf_cat(string, s); 223 } 224 buf_free(s); 225 if (!o) 226 break; 227 } 228 } 229 } 230 in->ptr = ptr; 231 backtrack(string, xlen); 232 return (0); 233 } 234 235 /* local-part = word *("." word) ; uninterpreted 236 * ; case-preserved 237 */ 238 static int local_part(BUFFER *in, BUFFER *addr, BUFFER *x) 239 { 240 BUFFER *w; 241 int c; 242 243 buf_clear(addr); 244 if (!word(in, addr, x)) 245 return (0); 246 w = buf_new(); 247 for (;;) { 248 c = buf_getc(in); 249 if (c == -1) 250 break; 251 if (c == '.' && (word(in, w, x))) 252 buf_appendc(addr, '.'), buf_cat(addr, w); 253 else { 254 buf_ungetc(in); 255 break; 256 } 257 } 258 buf_free(w); 259 return (addr->length); 260 } 261 262 /* domain = sub-domain *("." sub-domain) 263 */ 264 static int domain(BUFFER *in, BUFFER *domain, BUFFER *x) 265 { 266 BUFFER *sub; 267 int c; 268 269 if (!sub_domain(in, domain, x)) 270 return (0); 271 sub = buf_new(); 272 for (;;) { 273 c = buf_getc(in); 274 if (c == -1) 275 break; 276 if (c == '.' && (sub_domain(in, sub, x))) 277 buf_appendc(domain, '.'), buf_cat(domain, sub); 278 else { 279 buf_ungetc(in); 280 break; 281 } 282 } 283 buf_free(sub); 284 return (domain->length); 285 } 286 287 /* sub-domain = domain-ref / domain-literal 288 */ 289 static int sub_domain(BUFFER *in, BUFFER *sub, BUFFER *x) 290 { 291 return (domain_ref(in, sub, x) || domain_literal(in, sub, x)); 292 } 293 294 /* domain-ref = atom ; symbolic reference 295 */ 296 static int domain_ref(BUFFER *in, BUFFER *d, BUFFER *x) 297 { 298 return (atom(in, d, x)); 299 } 300 301 /* addr-spec = local-part "@" domain ; global address 302 */ 303 static int addr_spec(BUFFER *in, BUFFER *addr, BUFFER *x) 304 { 305 BUFFER *dom; 306 int ptr, xlen; 307 308 ptr = in->ptr, xlen = x ? x->length : 0; 309 dom = buf_new(); 310 buf_clear(addr); 311 if (local_part(in, addr, x) && buf_getc(in) == '@' && domain(in, dom, x)) 312 buf_appendc(addr, '@'), buf_cat(addr, dom); 313 else 314 buf_clear(addr), in->ptr = ptr, backtrack(x, xlen); 315 buf_free(dom); 316 return (addr->length); 317 } 318 319 /* route-addr = "<" [route] addr-spec ">" 320 */ 321 static int route_addr(BUFFER *in, BUFFER *addr, BUFFER *x) 322 { 323 int c; 324 int ptr, xlen; 325 326 ptr = in->ptr, xlen = x ? x->length : 0; 327 c = buf_getc(in); 328 if (c == -1) 329 return (0); 330 if (c != '<') { 331 buf_ungetc(in); 332 return (0); 333 } 334 if (addr_spec(in, addr, x) && buf_getc(in) == '>') 335 return (1); 336 in->ptr = ptr, backtrack(x, xlen); 337 return (0); 338 } 339 340 /* phrase = 1*word ; Sequence of words 341 */ 342 static int phrase(BUFFER *in, BUFFER *phr, BUFFER *x) 343 { 344 BUFFER *w; 345 346 buf_clear(phr); 347 w = buf_new(); 348 while (word(in, w, x)) { 349 if (phr->length) 350 buf_appendc(phr, ' '); 351 buf_cat(phr, w); 352 } 353 buf_free(w); 354 return (phr->length); 355 } 356 357 /* mailbox = addr-spec ; simple address 358 * / [phrase] route-addr ; name & addr-spec 359 * (RFC 1123) 360 */ 361 static int mailbox(BUFFER *in, BUFFER *mailbox, BUFFER *name, BUFFER *x) 362 { 363 int ptr, xlen, ret; 364 365 buf_clear(name); 366 if (addr_spec(in, mailbox, x)) 367 return (1); 368 369 ptr = in->ptr, xlen = x ? x->length : 0; 370 ret = phrase(in, name, x) && route_addr(in, mailbox, x); 371 if (!ret) { 372 in->ptr = ptr, backtrack(x, xlen); 373 ret = route_addr(in, mailbox, x); 374 } 375 376 return (ret); 377 } 378 379 /* address = mailbox ; one addressee 380 * / group ; named list 381 */ 382 static int address(BUFFER *in, BUFFER *address, BUFFER *name, BUFFER *x) 383 { 384 return (mailbox(in, address, name, x) || group(in, address, name, x)); 385 } 386 387 /* group = phrase ":" [#mailbox] ";" 388 */ 389 static int group(BUFFER *in, BUFFER *group, BUFFER *name, BUFFER *x) 390 { 391 BUFFER *addr, *tmp; 392 int ptr, xlen, ret = 0; 393 394 ptr = in->ptr, xlen = x ? x->length : 0; 395 addr = buf_new(); 396 tmp = buf_new(); 397 buf_clear(group); 398 if (phrase(in, name, x) && buf_getc(in) == ':') { 399 while (mailbox(in, addr, tmp, x)) 400 buf_cat(group, addr), buf_nl(group); 401 ret = buf_getc(in) == ';'; 402 } 403 if (!ret) 404 in->ptr = ptr, backtrack(x, xlen); 405 buf_free(addr); 406 buf_free(tmp); 407 return (ret); 408 } 409 410 /* domain-literal = "[" *(dtext / quoted-pair) "]" 411 */ 412 static int domain_literal(BUFFER *in, BUFFER *dom, BUFFER *x) 413 { 414 return 0; /* XXX */ 415 } 416 417 /* local address without `@' is not specified in RFC 822 */ 418 419 /* local_addr = "<" atom ">" */ 420 static int local_addr(BUFFER *in, BUFFER *addr, BUFFER *x) 421 { 422 int c; 423 int ptr, xlen; 424 425 ptr = in->ptr, xlen = x ? x->length : 0; 426 c = buf_getc(in); 427 if (c == -1) 428 return (0); 429 if (c != '<') { 430 buf_ungetc(in); 431 return (0); 432 } 433 if (atom(in, addr, x) && buf_getc(in) == '>') 434 return (1); 435 in->ptr = ptr, backtrack(x, xlen); 436 return (0); 437 } 438 439 static int localaddress(BUFFER *in, BUFFER *address, BUFFER *name, BUFFER *x) 440 { 441 int ptr, xlen; 442 443 buf_clear(name); 444 if (local_addr(in, address, x)) 445 return (1); 446 ptr = in->ptr, xlen = x ? x->length : 0; 447 if (phrase(in, name, x) && local_addr(in, address, x)) 448 return (1); 449 in->ptr = ptr, backtrack(x, xlen); 450 buf_clear(name); 451 return (atom(in, address, x)); 452 } 453 454 void rfc822_addr(BUFFER *destination, BUFFER *list) 455 { 456 BUFFER *addr, *name; 457 458 addr = buf_new(); 459 name = buf_new(); 460 461 for (;;) { 462 if (!address(destination, addr, name, NULL) && 463 !localaddress(destination, addr, name, NULL)) 464 break; 465 buf_cat(list, addr); 466 buf_nl(list); 467 if (buf_getc(destination) != ',') 468 break; 469 } 470 buf_free(addr); 471 buf_free(name); 472 } 473 474 void rfc822_name(BUFFER *line, BUFFER *name) 475 { 476 BUFFER *addr, *comment; 477 int ret; 478 479 addr = buf_new(); 480 comment = buf_new(); 481 ret = address(line, addr, name, comment); 482 if (ret == 0) 483 ret = localaddress(line, addr, name, comment); 484 if (ret) { 485 if (name->length == 0) 486 buf_set(name, comment); 487 if (name->length == 0) 488 buf_set(name, addr); 489 } 490 if (ret == 0) 491 buf_set(name, line); 492 buf_free(addr); 493 buf_free(comment); 494 } 495 496 /* MIME extensions. RFC 2045 */ 497 498 /* tspecials := "(" / ")" / "<" / ">" / "@" / 499 * "," / ";" / ":" / "\" / <"> 500 * "/" / "[" / "]" / "?" / "=" 501 */ 502 503 static int is_tspecials(int c) 504 { 505 return (c == '(' || c == ')' || c == '<' || c == '>' || c == '@' || 506 c == ',' || c == ';' || c == ':' || c == '\\' || c == '\"' || 507 c == '/' || c == '[' || c == ']' || c == '?' || c == '='); 508 } 509 510 /* token := 1*<any (US-ASCII) CHAR except SPACE, CTLs, 511 * or tspecials> 512 */ 513 static int token(BUFFER *in, BUFFER *token, BUFFER *x) 514 { 515 int c; 516 517 buf_clear(token); 518 wsc(in, x); 519 for (;;) { 520 c = buf_getc(in); 521 if (c == -1) 522 break; 523 else if (is_tspecials(c) || c == ' ' || c < 32 || c == 127) { 524 buf_ungetc(in); 525 break; 526 } else 527 buf_appendc(token, c); 528 } 529 if (token->length) 530 wsc(in, x); 531 return (token->length); 532 } 533 534 /* value := token / quoted-string 535 */ 536 537 static int value(BUFFER *in, BUFFER *value, BUFFER *x) 538 { 539 return (token(in, value, x) || quoted_string(in, value, x)); 540 } 541 542 /* parameter := attribute "=" value 543 */ 544 545 static int parameter(BUFFER *in, BUFFER *attribute, BUFFER *val, BUFFER *x) 546 { 547 int ptr; 548 ptr = in->ptr; 549 token(in, attribute, x); 550 if (buf_getc(in) != '=') { 551 in->ptr = ptr; 552 return(0); 553 } 554 return(value(in, val, x)); 555 } 556 557 /* get type */ 558 int get_type(BUFFER *content, BUFFER *type, BUFFER *subtype) 559 { 560 token(content, type, NULL); 561 if (buf_getc(content) == '/') 562 return (token(content, subtype, NULL)); 563 buf_ungetc(content); 564 buf_clear(type); 565 return (0); 566 } 567 568 /* get parameter value */ 569 void get_parameter(BUFFER *content, char *attribute, BUFFER *value) 570 { 571 BUFFER *tok; 572 tok = buf_new(); 573 buf_clear(value); 574 575 get_type(content, tok, tok); 576 for (;;) { 577 if (buf_getc(content) != ';') 578 break; 579 if (parameter(content, tok, value, NULL) && 580 strieq(attribute, tok->data)) 581 break; /* found */ 582 buf_clear(value); 583 } 584 buf_free(tok); 585 }