96#define ICONV_CACHE_SIZE 16
123 {
"ansi_x3.4-1968",
"us-ascii" },
124 {
"iso-ir-6",
"us-ascii" },
125 {
"iso_646.irv:1991",
"us-ascii" },
126 {
"ascii",
"us-ascii" },
127 {
"iso646-us",
"us-ascii" },
128 {
"us",
"us-ascii" },
129 {
"ibm367",
"us-ascii" },
130 {
"cp367",
"us-ascii" },
131 {
"csASCII",
"us-ascii" },
133 {
"csISO2022KR",
"iso-2022-kr" },
134 {
"csEUCKR",
"euc-kr" },
135 {
"csISO2022JP",
"iso-2022-jp" },
136 {
"csISO2022JP2",
"iso-2022-jp-2" },
138 {
"ISO_8859-1:1987",
"iso-8859-1" },
139 {
"iso-ir-100",
"iso-8859-1" },
140 {
"iso_8859-1",
"iso-8859-1" },
141 {
"latin1",
"iso-8859-1" },
142 {
"l1",
"iso-8859-1" },
143 {
"IBM819",
"iso-8859-1" },
144 {
"CP819",
"iso-8859-1" },
145 {
"csISOLatin1",
"iso-8859-1" },
147 {
"ISO_8859-2:1987",
"iso-8859-2" },
148 {
"iso-ir-101",
"iso-8859-2" },
149 {
"iso_8859-2",
"iso-8859-2" },
150 {
"latin2",
"iso-8859-2" },
151 {
"l2",
"iso-8859-2" },
152 {
"csISOLatin2",
"iso-8859-2" },
154 {
"ISO_8859-3:1988",
"iso-8859-3" },
155 {
"iso-ir-109",
"iso-8859-3" },
156 {
"ISO_8859-3",
"iso-8859-3" },
157 {
"latin3",
"iso-8859-3" },
158 {
"l3",
"iso-8859-3" },
159 {
"csISOLatin3",
"iso-8859-3" },
161 {
"ISO_8859-4:1988",
"iso-8859-4" },
162 {
"iso-ir-110",
"iso-8859-4" },
163 {
"ISO_8859-4",
"iso-8859-4" },
164 {
"latin4",
"iso-8859-4" },
165 {
"l4",
"iso-8859-4" },
166 {
"csISOLatin4",
"iso-8859-4" },
168 {
"ISO_8859-6:1987",
"iso-8859-6" },
169 {
"iso-ir-127",
"iso-8859-6" },
170 {
"iso_8859-6",
"iso-8859-6" },
171 {
"ECMA-114",
"iso-8859-6" },
172 {
"ASMO-708",
"iso-8859-6" },
173 {
"arabic",
"iso-8859-6" },
174 {
"csISOLatinArabic",
"iso-8859-6" },
176 {
"ISO_8859-7:1987",
"iso-8859-7" },
177 {
"iso-ir-126",
"iso-8859-7" },
178 {
"ISO_8859-7",
"iso-8859-7" },
179 {
"ELOT_928",
"iso-8859-7" },
180 {
"ECMA-118",
"iso-8859-7" },
181 {
"greek",
"iso-8859-7" },
182 {
"greek8",
"iso-8859-7" },
183 {
"csISOLatinGreek",
"iso-8859-7" },
185 {
"ISO_8859-8:1988",
"iso-8859-8" },
186 {
"iso-ir-138",
"iso-8859-8" },
187 {
"ISO_8859-8",
"iso-8859-8" },
188 {
"hebrew",
"iso-8859-8" },
189 {
"csISOLatinHebrew",
"iso-8859-8" },
191 {
"ISO_8859-5:1988",
"iso-8859-5" },
192 {
"iso-ir-144",
"iso-8859-5" },
193 {
"ISO_8859-5",
"iso-8859-5" },
194 {
"cyrillic",
"iso-8859-5" },
195 {
"csISOLatinCyrillic",
"iso-8859-5" },
197 {
"ISO_8859-9:1989",
"iso-8859-9" },
198 {
"iso-ir-148",
"iso-8859-9" },
199 {
"ISO_8859-9",
"iso-8859-9" },
200 {
"latin5",
"iso-8859-9" },
201 {
"l5",
"iso-8859-9" },
202 {
"csISOLatin5",
"iso-8859-9" },
204 {
"ISO_8859-10:1992",
"iso-8859-10" },
205 {
"iso-ir-157",
"iso-8859-10" },
206 {
"latin6",
"iso-8859-10" },
207 {
"l6",
"iso-8859-10" },
208 {
"csISOLatin6",
"iso-8859-10" },
210 {
"csKOI8r",
"koi8-r" },
212 {
"MS_Kanji",
"Shift_JIS" },
213 {
"csShiftJis",
"Shift_JIS" },
215 {
"Extended_UNIX_Code_Packed_Format_for_Japanese",
217 {
"csEUCPkdFmtJapanese",
"euc-jp" },
219 {
"csGB2312",
"gb2312" },
220 {
"csbig5",
"big5" },
225 {
"iso_8859-13",
"iso-8859-13" },
226 {
"iso-ir-179",
"iso-8859-13" },
227 {
"latin7",
"iso-8859-13" },
228 {
"l7",
"iso-8859-13" },
230 {
"iso_8859-14",
"iso-8859-14" },
231 {
"latin8",
"iso-8859-14" },
232 {
"l8",
"iso-8859-14" },
234 {
"iso_8859-15",
"iso-8859-15" },
235 {
"latin9",
"iso-8859-15" },
238 {
"latin0",
"iso-8859-15" },
240 {
"iso_8859-16",
"iso-8859-16" },
241 {
"latin10",
"iso-8859-16" },
243 {
"646",
"us-ascii" },
247 {
"eucJP",
"euc-jp" },
248 {
"PCK",
"Shift_JIS" },
249 {
"ko_KR-euc",
"euc-kr" },
250 {
"zh_TW-big5",
"big5" },
254 {
"sjis",
"Shift_JIS" },
255 {
"euc-jp-ms",
"eucJP-ms" },
332 const char *charset,
char **ps)
345 char const *c = np->
data;
379 char in[1024] = { 0 };
380 char scratch[1024 + 10] = { 0 };
384 char *ext = strchr(in,
'/');
397 snprintf(scratch,
sizeof(scratch),
"iso-8859-%s", in + plen);
399 snprintf(scratch,
sizeof(scratch),
"iso-8859-%s", in + plen);
401 snprintf(scratch,
sizeof(scratch),
"iso_8859-%s", in + plen);
403 snprintf(scratch,
sizeof(scratch),
"iso_8859-%s", in + plen);
420 if (ext && (*ext !=
'\0'))
447 char buf[256] = { 0 };
455 ((len1 > len2) ? cs2 : buf),
MIN(len1, len2));
467 static char fcharset[128];
468 const char *c = NULL;
470 if (assumed_charset && (assumed_charset->
count > 0))
488 char buf[1024] = { 0 };
510 const char *replace,
struct Buffer *err)
512 if (!pat || !replace)
516 int rc =
REG_COMP(rx, pat, REG_ICASE);
544 struct Lookup *tmp = NULL;
596 char tocode1[128] = { 0 };
597 char fromcode1[128] = { 0 };
598 const char *tocode2 = NULL, *fromcode2 = NULL;
599 const char *tmp = NULL;
618 if (strcmp(tocode1,
IconvCache[i].tocode1) == 0 &&
619 strcmp(fromcode1,
IconvCache[i].fromcode1) == 0)
625 for (
int j = i; j-- > 0;)
634 iconv(
cd, NULL, NULL, NULL, NULL);
643 tocode2 = tocode2 ? tocode2 :
tocode1;
645 fromcode2 = fromcode2 ? fromcode2 :
fromcode1;
648 iconv_t
cd = iconv_open(tocode2, fromcode2);
698 char **outbuf,
size_t *outbytesleft,
const char **inrepls,
699 const char *outrepl,
int *iconverrno)
702 const char *ib = *inbuf;
703 size_t ibl = *inbytesleft;
705 size_t obl = *outbytesleft;
710 const size_t ret1 = iconv(
cd, (ICONV_CONST
char **) &ib, &ibl, &ob, &obl);
716 if (ibl && obl && (errno ==
EILSEQ))
721 const char **t = NULL;
722 for (t = inrepls; *t; t++)
724 const char *ib1 = *t;
725 size_t ibl1 = strlen(*t);
728 iconv(
cd, (ICONV_CONST
char **) &ib1, &ibl1, &ob1, &obl1);
745 iconv(
cd, NULL, NULL, &ob, &obl);
748 int n = strlen(outrepl);
754 memcpy(ob, outrepl, n);
760 iconv(
cd, NULL, NULL, NULL, NULL);
796int mutt_ch_check(
const char *s,
size_t slen,
const char *from,
const char *to)
798 if (!s || !from || !to)
806 size_t outlen = MB_LEN_MAX * slen;
808 char *saved_out = out;
810 const size_t convlen = iconv(
cd, (ICONV_CONST
char **) &s, &slen, &out, &outlen);
838 if (!s || (*s ==
'\0'))
844 const char *repls[] = {
"\357\277\275",
"?", 0 };
851 const char **inrepls = NULL;
852 const char *outrepl = NULL;
855 outrepl =
"\357\277\275";
862 size_t ibl = strlen(s);
863 if (ibl >= (SIZE_MAX / MB_LEN_MAX))
867 size_t obl = MB_LEN_MAX * ibl;
872 iconv(
cd, 0, 0, &ob, &obl);
943 static const char *repls[] = {
"\357\277\275",
"?", 0 };
988 return fgetc(fc->
fp);
992 return (
unsigned char) *(fc->
p)++;
999 size_t obl =
sizeof(fc->
bufo);
1000 iconv(fc->
cd, (ICONV_CONST
char **) &fc->
ib, &fc->
ibl, &fc->
ob, &obl);
1002 return (
unsigned char) *(fc->
p)++;
1009 if ((fc->
ibl ==
sizeof(fc->
bufi)) ||
1023 size_t obl =
sizeof(fc->
bufo);
1027 return (
unsigned char) *(fc->
p)++;
1051 for (r = 0; (r + 1) < buflen;)
1056 buf[r++] = (char) c;
1080 char buf[256] = { 0 };
1095#if defined(HAVE_BIND_TEXTDOMAIN_CODESET) && defined(ENABLE_NLS)
1096 bind_textdomain_codeset(PACKAGE, buf);
1112 const char *u,
size_t ulen,
char **d,
size_t *dlen)
1114 if (!fromcode || !charsets)
1117 char *e = NULL, *tocode = NULL;
1118 size_t elen = 0, bestn = 0;
1139 if (!tocode || (n < bestn))
1168 char canonical_buf[1024] = { 0 };
size_t buf_addch(struct Buffer *buf, char c)
Add a single character to a Buffer.
size_t buf_addstr(struct Buffer *buf, const char *s)
Add a string to a Buffer.
size_t buf_strcpy(struct Buffer *buf, const char *s)
Copy a string into a Buffer.
void buf_lower(struct Buffer *buf)
Sets a buffer to lowercase.
General purpose object for storing and parsing strings.
static const char * buf_string(const struct Buffer *buf)
Convert a buffer to a const char * "string".
#define mutt_debug(LEVEL,...)
@ LL_DEBUG2
Log at debug level 2.
void * mutt_mem_calloc(size_t nmemb, size_t size)
Allocate zeroed memory on the heap.
void * mutt_mem_malloc(size_t size)
Allocate memory on the heap.
Memory management wrappers.
bool mutt_ch_check_charset(const char *cs, bool strict)
Does iconv understand a character set?
size_t mutt_ch_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft, const char **inrepls, const char *outrepl, int *iconverrno)
Change the encoding of a string.
void mutt_ch_lookup_remove(void)
Remove all the character set lookups.
static int IconvCacheUsed
Number of iconv descriptors in the cache.
char * mutt_ch_choose(const char *fromcode, const struct Slist *charsets, const char *u, size_t ulen, char **d, size_t *dlen)
Figure the best charset to encode a string.
int mutt_ch_convert_nonmime_string(const struct Slist *const assumed_charset, const char *charset, char **ps)
Try to convert a string using a list of character sets.
static struct LookupList Lookups
Lookup table of preferred character set names.
char * mutt_ch_get_langinfo_charset(void)
Get the user's choice of character set.
static const struct MimeNames PreferredMimeNames[]
Lookup table of preferred charsets.
bool mutt_ch_lookup_add(enum LookupType type, const char *pat, const char *replace, struct Buffer *err)
Add a new character set lookup.
void mutt_ch_canonical_charset(char *buf, size_t buflen, const char *name)
Canonicalise the charset of a string.
void mutt_ch_cache_cleanup(void)
Clean up the cached iconv handles and charset strings.
const char * mutt_ch_iconv_lookup(const char *chs)
Look for a replacement character set.
int mutt_ch_convert_string(char **ps, const char *from, const char *to, uint8_t flags)
Convert a string between encodings.
void mutt_ch_set_charset(const char *charset)
Update the records for a new character set.
bool CharsetIsUtf8
Is the user's current character set utf-8?
static const char * lookup_charset(enum LookupType type, const char *cs)
Look for a preferred character set name.
int mutt_ch_check(const char *s, size_t slen, const char *from, const char *to)
Check whether a string can be converted between encodings.
const char * mutt_ch_charset_lookup(const char *chs)
Look for a replacement character set.
static struct Lookup * lookup_new(void)
Create a new Lookup.
int mutt_ch_fgetconv(struct FgetConv *fc)
Convert a file's character set.
#define ICONV_CACHE_SIZE
Max size of the iconv cache.
static void lookup_free(struct Lookup **ptr)
Free a Lookup.
wchar_t ReplacementChar
When a Unicode character can't be displayed, use this instead.
struct FgetConv * mutt_ch_fgetconv_open(FILE *fp, const char *from, const char *to, uint8_t flags)
Prepare a file for charset conversion.
static struct IconvCacheEntry IconvCache[ICONV_CACHE_SIZE]
Cache of iconv conversion descriptors.
char * mutt_ch_fgetconvs(char *buf, size_t buflen, struct FgetConv *fc)
Convert a file's charset into a string buffer.
bool mutt_ch_chscmp(const char *cs1, const char *cs2)
Are the names of two character sets equivalent?
void mutt_ch_fgetconv_close(struct FgetConv **ptr)
Close an fgetconv handle.
iconv_t mutt_ch_iconv_open(const char *tocode, const char *fromcode, uint8_t flags)
Set up iconv for conversions.
const char * mutt_ch_get_default_charset(const struct Slist *const assumed_charset)
Get the default character set.
Conversion between different character encodings.
#define MUTT_ICONV_HOOK_FROM
apply charset-hooks to fromcode
#define ICONV_T_INVALID
Error value for iconv functions.
#define mutt_ch_is_utf8(str)
LookupType
Types of character set lookups.
@ MUTT_LOOKUP_ICONV
Character set conversion.
@ MUTT_LOOKUP_CHARSET
Alias for another character set.
#define MUTT_ICONV_NO_FLAGS
No flags are set.
#define ICONV_ILLEGAL_SEQ
Error value for iconv() - Illegal sequence.
static bool iconv_t_valid(const iconv_t cd)
Is the conversion descriptor valid?
bool mutt_regex_match(const struct Regex *regex, const char *str)
Shorthand to mutt_regex_capture()
char * mutt_strn_dup(const char *begin, size_t len)
Duplicate a sub-string.
bool mutt_istr_equal(const char *a, const char *b)
Compare two strings, ignoring case.
char * mutt_str_dup(const char *str)
Copy a string, safely.
void mutt_str_adjust(char **ptr)
Shrink-to-fit a string.
size_t mutt_str_len(const char *a)
Calculate the length of a string, safely.
size_t mutt_str_copy(char *dest, const char *src, size_t dsize)
Copy a string into a buffer (guaranteeing NUL-termination)
size_t mutt_istr_startswith(const char *str, const char *prefix)
Check whether a string starts with a prefix, ignoring case.
bool mutt_istrn_equal(const char *a, const char *b, size_t num)
Check for equality of two strings ignoring case (to a maximum), safely.
char * mutt_str_replace(char **p, const char *s)
Replace one string with another.
struct Buffer * buf_pool_get(void)
Get a Buffer from the pool.
void buf_pool_release(struct Buffer **ptr)
Return a Buffer to the pool.
A global pool of Buffers.
#define TAILQ_FOREACH(var, head, field)
#define TAILQ_FOREACH_SAFE(var, head, field, tvar)
#define STAILQ_FIRST(head)
#define TAILQ_HEAD(name, type)
#define TAILQ_INSERT_TAIL(head, elm, field)
#define STAILQ_FOREACH(var, head, field)
#define TAILQ_REMOVE(head, elm, field)
#define TAILQ_HEAD_INITIALIZER(head)
#define TAILQ_ENTRY(type)
Manage regular expressions.
#define REG_COMP(preg, regex, cflags)
Compile a regular expression.
A separated list of strings.
String manipulation functions.
String manipulation buffer.
size_t dsize
Length of data.
char * data
Pointer to data.
Cursor for converting a file's encoding.
iconv_t cd
iconv conversion descriptor
Cached iconv conversion descriptor.
char * tocode1
Destination character set.
char * fromcode1
Source character set.
iconv_t cd
iconv conversion descriptor
Regex to String lookup table.
char * replacement
Alternative charset to use.
enum LookupType type
Lookup type.
struct Regex regex
Regular expression.
Cached regular expression.
char * pattern
printable version
regex_t * regex
compiled expression
struct ListHead head
List containing values.
size_t count
Number of values in list.