blob: aed01cf32475c657c0ff3892261f0b1d6ab900b4 [file] [log] [blame]
/* Quick and dirty RFC 2047 */
#include "a.h"
static int
unhex1(char c)
{
if('0' <= c && c <= '9')
return c-'0';
if('a' <= c && c <= 'f')
return c-'a'+10;
if('A' <= c && c <= 'F')
return c-'A'+10;
return 15;
}
static int
unhex(char *s)
{
return unhex1(s[0])*16+unhex1(s[1]);
}
int
_decqp(uchar *out, int lim, char *in, int n, int underscores)
{
char *p, *ep;
uchar *eout, *out0;
out0 = out;
eout = out+lim;
for(p=in, ep=in+n; p<ep && out<eout; ){
if(underscores && *p == '_'){
*out++ = ' ';
p++;
}
else if(*p == '='){
if(p+1 >= ep)
break;
if(*(p+1) == '\n'){
p += 2;
continue;
}
if(p+3 > ep)
break;
*out++ = unhex(p+1);
p += 3;
}else
*out++ = *p++;
}
return out-out0;
}
int
decqp(uchar *out, int lim, char *in, int n)
{
return _decqp(out, lim, in, n, 0);
}
char*
decode(int kind, char *s, int *len)
{
char *t;
int l;
if(s == nil)
return s;
switch(kind){
case QuotedPrintable:
case QuotedPrintableU:
l = strlen(s)+1;
t = emalloc(l);
l = _decqp((uchar*)t, l, s, l-1, kind==QuotedPrintableU);
*len = l;
t[l] = 0;
return t;
case Base64:
l = strlen(s)+1;
t = emalloc(l);
l = dec64((uchar*)t, l, s, l-1);
*len = l;
t[l] = 0;
return t;
default:
*len = strlen(s);
return estrdup(s);
}
}
struct {
char *mime;
char *tcs;
} tcstab[] = {
"iso-8859-2", "8859-2",
"iso-8859-3", "8859-3",
"iso-8859-4", "8859-4",
"iso-8859-5", "8859-5",
"iso-8859-6", "8859-6",
"iso-8859-7", "8859-7",
"iso-8859-8", "8859-8",
"iso-8859-9", "8859-9",
"iso-8859-10", "8859-10",
"iso-8859-15", "8859-15",
"big5", "big5",
"iso-2022-jp", "jis-kanji",
"windows-1250", "windows-1250",
"windows-1251", "windows-1251",
"windows-1252", "windows-1252",
"windows-1253", "windows-1253",
"windows-1254", "windows-1254",
"windows-1255", "windows-1255",
"windows-1256", "windows-1256",
"windows-1257", "windows-1257",
"windows-1258", "windows-1258",
"koi8-r", "koi8"
};
typedef struct Writeargs Writeargs;
struct Writeargs
{
int fd;
char *s;
};
static void
twriter(void *v)
{
Writeargs *w;
w = v;
write(w->fd, w->s, strlen(w->s));
close(w->fd);
free(w->s);
free(w);
}
char*
tcs(char *charset, char *s)
{
char *buf;
int i, n, nbuf;
int fd[3], p[2], pp[2];
uchar *us;
char *t, *u;
char *argv[4];
Rune r;
Writeargs *w;
if(s == nil || charset == nil || *s == 0)
return s;
if(cistrcmp(charset, "utf-8") == 0)
return s;
if(cistrcmp(charset, "iso-8859-1") == 0 || cistrcmp(charset, "us-ascii") == 0){
latin1:
n = 0;
for(us=(uchar*)s; *us; us++)
n += runelen(*us);
n++;
t = emalloc(n);
for(us=(uchar*)s, u=t; *us; us++){
r = *us;
u += runetochar(u, &r);
}
*u = 0;
free(s);
return t;
}
for(i=0; i<nelem(tcstab); i++)
if(cistrcmp(charset, tcstab[i].mime) == 0)
goto tcs;
goto latin1;
tcs:
argv[0] = "tcs";
argv[1] = "-f";
argv[2] = charset;
argv[3] = nil;
if(pipe(p) < 0 || pipe(pp) < 0)
sysfatal("pipe: %r");
fd[0] = p[0];
fd[1] = pp[0];
fd[2] = dup(2, -1);
if(threadspawnl(fd, "tcs", "tcs", "-f", tcstab[i].tcs, nil) < 0){
close(p[0]);
close(p[1]);
close(pp[0]);
close(pp[1]);
close(fd[2]);
goto latin1;
}
close(p[0]);
close(pp[0]);
nbuf = UTFmax*strlen(s)+100; /* just a guess at worst case */
buf = emalloc(nbuf);
w = emalloc(sizeof *w);
w->fd = p[1];
w->s = estrdup(s);
proccreate(twriter, w, STACK);
n = readn(pp[1], buf, nbuf-1);
close(pp[1]);
if(n <= 0){
free(buf);
goto latin1;
}
buf[n] = 0;
free(s);
s = estrdup(buf);
free(buf);
return s;
}
char*
unrfc2047(char *s)
{
char *p, *q, *t, *u, *v;
int len;
Rune r;
Fmt fmt;
if(s == nil)
return nil;
if(strstr(s, "=?") == nil)
return s;
fmtstrinit(&fmt);
for(p=s; *p; ){
/* =?charset?e?text?= */
if(*p=='=' && *(p+1)=='?'){
p += 2;
q = strchr(p, '?');
if(q == nil)
goto emit;
q++;
if(*q == '?' || *(q+1) != '?')
goto emit;
t = q+2;
u = strchr(t, '?');
if(u == nil || *(u+1) != '=')
goto emit;
switch(*q){
case 'q':
case 'Q':
*u = 0;
v = decode(QuotedPrintableU, t, &len);
break;
case 'b':
case 'B':
*u = 0;
v = decode(Base64, t, &len);
break;
default:
goto emit;
}
*(q-1) = 0;
v = tcs(p, v);
fmtstrcpy(&fmt, v);
free(v);
p = u+2;
}
emit:
p += chartorune(&r, p);
fmtrune(&fmt, r);
}
p = fmtstrflush(&fmt);
if(p == nil)
sysfatal("out of memory");
free(s);
return p;
}
#ifdef TEST
char *test[] =
{
"hello world",
"hello =?iso-8859-1?q?this is some text?=",
"=?US-ASCII?Q?Keith_Moore?=",
"=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=",
"=?ISO-8859-1?Q?Andr=E9?= Pirard",
"=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=",
"=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=",
"=?ISO-8859-1?Q?Olle_J=E4rnefors?=",
"=?iso-2022-jp?B?GyRCTTVKISRKP006SiRyS34kPyQ3JEZKcz03JCIkahsoQg==?=",
"=?UTF-8?B?Ik5pbHMgTy4gU2Vsw6VzZGFsIg==?="
};
void
threadmain(int argc, char **argv)
{
int i;
for(i=0; i<nelem(test); i++)
print("%s\n\t%s\n", test[i], unrfc2047(estrdup(test[i])));
threadexitsall(0);
}
#endif