Skip to content

Commit e56ea97

Browse files
authored
Merge pull request #64 from unxed/oemcp
Use system locale to select OEM code page…
2 parents 36272c6 + c104127 commit e56ea97

File tree

1 file changed

+85
-0
lines changed

1 file changed

+85
-0
lines changed

CPP/7zip/Archive/Zip/ZipItem.cpp

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
// Archive/ZipItem.cpp
22

3+
#ifndef _WIN32
4+
#include <iconv.h>
5+
#include <locale.h>
6+
#endif
7+
38
#include "StdAfx.h"
49

510
#include "../../../../C/CpuArch.h"
@@ -350,6 +355,86 @@ void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, boo
350355
#endif
351356
}
352357

358+
#ifndef _WIN32
359+
// Convert OEM char set to UTF-8 if needed
360+
// Use system locale to select code page
361+
362+
Byte hostOS = GetHostOS();
363+
if (!isUtf8 && ((hostOS == NFileHeader::NHostOS::kFAT) || (hostOS == NFileHeader::NHostOS::kNTFS))) {
364+
365+
const char *oemcp;
366+
oemcp = getenv("OEMCP");
367+
if (!oemcp) {
368+
oemcp = "CP437\0"; // CP name is 6 chars max
369+
370+
const char *lc_to_cp_table[] = {
371+
"af_ZA", "CP850", "ar_SA", "CP720", "ar_LB", "CP720", "ar_EG", "CP720",
372+
"ar_DZ", "CP720", "ar_BH", "CP720", "ar_IQ", "CP720", "ar_JO", "CP720",
373+
"ar_KW", "CP720", "ar_LY", "CP720", "ar_MA", "CP720", "ar_OM", "CP720",
374+
"ar_QA", "CP720", "ar_SY", "CP720", "ar_TN", "CP720", "ar_AE", "CP720",
375+
"ar_YE", "CP720","ast_ES", "CP850", "az_AZ", "CP866", "az_AZ", "CP857",
376+
"be_BY", "CP866", "bg_BG", "CP866", "br_FR", "CP850", "ca_ES", "CP850",
377+
"zh_CN", "CP936", "zh_TW", "CP950", "kw_GB", "CP850", "cs_CZ", "CP852",
378+
"cy_GB", "CP850", "da_DK", "CP850", "de_AT", "CP850", "de_LI", "CP850",
379+
"de_LU", "CP850", "de_CH", "CP850", "de_DE", "CP850", "el_GR", "CP737",
380+
"en_AU", "CP850", "en_CA", "CP850", "en_GB", "CP850", "en_IE", "CP850",
381+
"en_JM", "CP850", "en_BZ", "CP850", "en_PH", "CP437", "en_ZA", "CP437",
382+
"en_TT", "CP850", "en_US", "CP437", "en_ZW", "CP437", "en_NZ", "CP850",
383+
"es_PA", "CP850", "es_BO", "CP850", "es_CR", "CP850", "es_DO", "CP850",
384+
"es_SV", "CP850", "es_EC", "CP850", "es_GT", "CP850", "es_HN", "CP850",
385+
"es_NI", "CP850", "es_CL", "CP850", "es_MX", "CP850", "es_ES", "CP850",
386+
"es_CO", "CP850", "es_ES", "CP850", "es_PE", "CP850", "es_AR", "CP850",
387+
"es_PR", "CP850", "es_VE", "CP850", "es_UY", "CP850", "es_PY", "CP850",
388+
"et_EE", "CP775", "eu_ES", "CP850", "fa_IR", "CP720", "fi_FI", "CP850",
389+
"fo_FO", "CP850", "fr_FR", "CP850", "fr_BE", "CP850", "fr_CA", "CP850",
390+
"fr_LU", "CP850", "fr_MC", "CP850", "fr_CH", "CP850", "ga_IE", "CP437",
391+
"gd_GB", "CP850", "gv_IM", "CP850", "gl_ES", "CP850", "he_IL", "CP862",
392+
"hr_HR", "CP852", "hu_HU", "CP852", "id_ID", "CP850", "is_IS", "CP850",
393+
"it_IT", "CP850", "it_CH", "CP850", "iv_IV", "CP437", "ja_JP", "CP932",
394+
"kk_KZ", "CP866", "ko_KR", "CP949", "ky_KG", "CP866", "lt_LT", "CP775",
395+
"lv_LV", "CP775", "mk_MK", "CP866", "mn_MN", "CP866", "ms_BN", "CP850",
396+
"ms_MY", "CP850", "nl_BE", "CP850", "nl_NL", "CP850", "nl_SR", "CP850",
397+
"nn_NO", "CP850", "nb_NO", "CP850", "pl_PL", "CP852", "pt_BR", "CP850",
398+
"pt_PT", "CP850", "rm_CH", "CP850", "ro_RO", "CP852", "ru_RU", "CP866",
399+
"sk_SK", "CP852", "sl_SI", "CP852", "sq_AL", "CP852", "sr_RS", "CP855",
400+
"sr_RS", "CP852", "sv_SE", "CP850", "sv_FI", "CP850", "sw_KE", "CP437",
401+
"th_TH", "CP874", "tr_TR", "CP857", "tt_RU", "CP866", "uk_UA", "CP866",
402+
"ur_PK", "CP720", "uz_UZ", "CP866", "uz_UZ", "CP857", "vi_VN", "CP1258",
403+
"wa_BE", "CP850", "zh_HK", "CP950", "zh_SG", "CP936"};
404+
int table_len = sizeof(lc_to_cp_table) / sizeof(char *);
405+
int lc_len, i;
406+
407+
char *lc = setlocale(LC_CTYPE, "");
408+
409+
if (lc && lc[0]) {
410+
// Compare up to the dot, if it exists, e.g. en_US.UTF-8
411+
for (lc_len = 0; lc[lc_len] != '.' && lc[lc_len] != '\0'; ++lc_len)
412+
;
413+
for (i = 0; i < table_len; i += 2)
414+
if (strncmp(lc, lc_to_cp_table[i], lc_len) == 0)
415+
oemcp = lc_to_cp_table[i + 1];
416+
}
417+
}
418+
419+
iconv_t cd;
420+
if ((cd = iconv_open("UTF-8", oemcp)) != (iconv_t)-1) {
421+
422+
AString s_utf8;
423+
const char* src = s.Ptr();
424+
size_t slen = s.Len();
425+
size_t dlen = slen * 4;
426+
const char* dest = s_utf8.GetBuf_SetEnd(dlen + 1); // (source length * 4) + null termination
427+
428+
size_t done = iconv(cd, (char**)&src, &slen, (char**)&dest, &dlen);
429+
bzero((size_t*)dest + done, 1);
430+
431+
iconv_close(cd);
432+
433+
if (ConvertUTF8ToUnicode(s_utf8, res) || ignore_Utf8_Errors)
434+
return;
435+
}
436+
}
437+
#endif
353438

354439
if (isUtf8)
355440
if (ConvertUTF8ToUnicode(s, res) || ignore_Utf8_Errors)

0 commit comments

Comments
 (0)