From ec02b3c434886f410a980582c2ff9affea911d06 Mon Sep 17 00:00:00 2001 From: Ivan Sorokin Date: Tue, 21 May 2024 01:54:15 +0200 Subject: [PATCH 01/14] Use system locale to select OEM/ANSI codepage for legacy zip archives --- CPP/7zip/Archive/Zip/ZipItem.cpp | 151 ++++++++++++++++++++++++++++++- 1 file changed, 149 insertions(+), 2 deletions(-) diff --git a/CPP/7zip/Archive/Zip/ZipItem.cpp b/CPP/7zip/Archive/Zip/ZipItem.cpp index cffbb78a4..ad0fd740c 100644 --- a/CPP/7zip/Archive/Zip/ZipItem.cpp +++ b/CPP/7zip/Archive/Zip/ZipItem.cpp @@ -1,5 +1,10 @@ // Archive/ZipItem.cpp +#ifndef _WIN32 +#include +#include +#endif + #include "StdAfx.h" #include "../../../../C/CpuArch.h" @@ -448,8 +453,150 @@ void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, boo } #endif } - - + + #ifndef _WIN32 + + // Convert OEM char set to UTF-8 if needed + // Use system locale to select code page + + // locale -> code page translation tables generated from Wine source code + + const char *lcToOemTable[] = { + "af_ZA", "CP850", "ar_SA", "CP720", "ar_LB", "CP720", "ar_EG", "CP720", + "ar_DZ", "CP720", "ar_BH", "CP720", "ar_IQ", "CP720", "ar_JO", "CP720", + "ar_KW", "CP720", "ar_LY", "CP720", "ar_MA", "CP720", "ar_OM", "CP720", + "ar_QA", "CP720", "ar_SY", "CP720", "ar_TN", "CP720", "ar_AE", "CP720", + "ar_YE", "CP720", "ast_ES", "CP850", "az_AZ", "CP866", "az_AZ", "CP857", + "be_BY", "CP866", "bg_BG", "CP866", "br_FR", "CP850", "ca_ES", "CP850", + "zh_CN", "CP936", "zh_TW", "CP950", "kw_GB", "CP850", "cs_CZ", "CP852", + "cy_GB", "CP850", "da_DK", "CP850", "de_AT", "CP850", "de_LI", "CP850", + "de_LU", "CP850", "de_CH", "CP850", "de_DE", "CP850", "el_GR", "CP737", + "en_AU", "CP850", "en_CA", "CP850", "en_GB", "CP850", "en_IE", "CP850", + "en_JM", "CP850", "en_BZ", "CP850", "en_PH", "CP437", "en_ZA", "CP437", + "en_TT", "CP850", "en_US", "CP437", "en_ZW", "CP437", "en_NZ", "CP850", + "es_PA", "CP850", "es_BO", "CP850", "es_CR", "CP850", "es_DO", "CP850", + "es_SV", "CP850", "es_EC", "CP850", "es_GT", "CP850", "es_HN", "CP850", + "es_NI", "CP850", "es_CL", "CP850", "es_MX", "CP850", "es_ES", "CP850", + "es_CO", "CP850", "es_ES", "CP850", "es_PE", "CP850", "es_AR", "CP850", + "es_PR", "CP850", "es_VE", "CP850", "es_UY", "CP850", "es_PY", "CP850", + "et_EE", "CP775", "eu_ES", "CP850", "fa_IR", "CP720", "fi_FI", "CP850", + "fo_FO", "CP850", "fr_FR", "CP850", "fr_BE", "CP850", "fr_CA", "CP850", + "fr_LU", "CP850", "fr_MC", "CP850", "fr_CH", "CP850", "ga_IE", "CP437", + "gd_GB", "CP850", "gv_IM", "CP850", "gl_ES", "CP850", "he_IL", "CP862", + "hr_HR", "CP852", "hu_HU", "CP852", "id_ID", "CP850", "is_IS", "CP850", + "it_IT", "CP850", "it_CH", "CP850", "iv_IV", "CP437", "ja_JP", "CP932", + "kk_KZ", "CP866", "ko_KR", "CP949", "ky_KG", "CP866", "lt_LT", "CP775", + "lv_LV", "CP775", "mk_MK", "CP866", "mn_MN", "CP866", "ms_BN", "CP850", + "ms_MY", "CP850", "nl_BE", "CP850", "nl_NL", "CP850", "nl_SR", "CP850", + "nn_NO", "CP850", "nb_NO", "CP850", "pl_PL", "CP852", "pt_BR", "CP850", + "pt_PT", "CP850", "rm_CH", "CP850", "ro_RO", "CP852", "ru_RU", "CP866", + "sk_SK", "CP852", "sl_SI", "CP852", "sq_AL", "CP852", "sr_RS", "CP855", + "sr_RS", "CP852", "sv_SE", "CP850", "sv_FI", "CP850", "sw_KE", "CP437", + "th_TH", "CP874", "tr_TR", "CP857", "tt_RU", "CP866", "uk_UA", "CP866", + "ur_PK", "CP720", "uz_UZ", "CP866", "uz_UZ", "CP857", "vi_VN", "CP1258", + "wa_BE", "CP850", "zh_HK", "CP950", "zh_SG", "CP936"}; + + const char *lcToAnsiTable[] = { + "af_ZA", "CP1252", "ar_SA", "CP1256", "ar_LB", "CP1256", "ar_EG", "CP1256", + "ar_DZ", "CP1256", "ar_BH", "CP1256", "ar_IQ", "CP1256", "ar_JO", "CP1256", + "ar_KW", "CP1256", "ar_LY", "CP1256", "ar_MA", "CP1256", "ar_OM", "CP1256", + "ar_QA", "CP1256", "ar_SY", "CP1256", "ar_TN", "CP1256", "ar_AE", "CP1256", + "ar_YE", "CP1256","ast_ES", "CP1252", "az_AZ", "CP1251", "az_AZ", "CP1254", + "be_BY", "CP1251", "bg_BG", "CP1251", "br_FR", "CP1252", "ca_ES", "CP1252", + "zh_CN", "CP936", "zh_TW", "CP950", "kw_GB", "CP1252", "cs_CZ", "CP1250", + "cy_GB", "CP1252", "da_DK", "CP1252", "de_AT", "CP1252", "de_LI", "CP1252", + "de_LU", "CP1252", "de_CH", "CP1252", "de_DE", "CP1252", "el_GR", "CP1253", + "en_AU", "CP1252", "en_CA", "CP1252", "en_GB", "CP1252", "en_IE", "CP1252", + "en_JM", "CP1252", "en_BZ", "CP1252", "en_PH", "CP1252", "en_ZA", "CP1252", + "en_TT", "CP1252", "en_US", "CP1252", "en_ZW", "CP1252", "en_NZ", "CP1252", + "es_PA", "CP1252", "es_BO", "CP1252", "es_CR", "CP1252", "es_DO", "CP1252", + "es_SV", "CP1252", "es_EC", "CP1252", "es_GT", "CP1252", "es_HN", "CP1252", + "es_NI", "CP1252", "es_CL", "CP1252", "es_MX", "CP1252", "es_ES", "CP1252", + "es_CO", "CP1252", "es_ES", "CP1252", "es_PE", "CP1252", "es_AR", "CP1252", + "es_PR", "CP1252", "es_VE", "CP1252", "es_UY", "CP1252", "es_PY", "CP1252", + "et_EE", "CP1257", "eu_ES", "CP1252", "fa_IR", "CP1256", "fi_FI", "CP1252", + "fo_FO", "CP1252", "fr_FR", "CP1252", "fr_BE", "CP1252", "fr_CA", "CP1252", + "fr_LU", "CP1252", "fr_MC", "CP1252", "fr_CH", "CP1252", "ga_IE", "CP1252", + "gd_GB", "CP1252", "gv_IM", "CP1252", "gl_ES", "CP1252", "he_IL", "CP1255", + "hr_HR", "CP1250", "hu_HU", "CP1250", "id_ID", "CP1252", "is_IS", "CP1252", + "it_IT", "CP1252", "it_CH", "CP1252", "iv_IV", "CP1252", "ja_JP", "CP932", + "kk_KZ", "CP1251", "ko_KR", "CP949", "ky_KG", "CP1251", "lt_LT", "CP1257", + "lv_LV", "CP1257", "mk_MK", "CP1251", "mn_MN", "CP1251", "ms_BN", "CP1252", + "ms_MY", "CP1252", "nl_BE", "CP1252", "nl_NL", "CP1252", "nl_SR", "CP1252", + "nn_NO", "CP1252", "nb_NO", "CP1252", "pl_PL", "CP1250", "pt_BR", "CP1252", + "pt_PT", "CP1252", "rm_CH", "CP1252", "ro_RO", "CP1250", "ru_RU", "CP1251", + "sk_SK", "CP1250", "sl_SI", "CP1250", "sq_AL", "CP1250", "sr_RS", "CP1251", + "sr_RS", "CP1250", "sv_SE", "CP1252", "sv_FI", "CP1252", "sw_KE", "CP1252", + "th_TH", "CP874", "tr_TR", "CP1254", "tt_RU", "CP1251", "uk_UA", "CP1251", + "ur_PK", "CP1256", "uz_UZ", "CP1251", "uz_UZ", "CP1254", "vi_VN", "CP1258", + "wa_BE", "CP1252", "zh_HK", "CP950", "zh_SG", "CP936"}; + + bool isAnsi = false; + bool isOem = false; + + if (!isUtf8 && + MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS && + MadeByVersion.Version >= 20) { + isAnsi = true; + } else if (!isUtf8 && + (MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS || + MadeByVersion.HostOS == NFileHeader::NHostOS::kFAT)) { + isOem = true; + } + + if (isOem || isAnsi) { + + const char *legacyCp = nullptr; + int tableLen = sizeof(isOem ? lcToOemTable : lcToAnsiTable) / sizeof(char *); + int lcLen = 0, i; + + // Detect required code page name from current locale + char *lc = setlocale(LC_CTYPE, ""); + + if (lc && lc[0]) { + // Compare up to the dot, if it exists, e.g. en_US.UTF-8 + for (lcLen = 0; lc[lcLen] != '.' && lc[lcLen] != '\0'; ++lcLen); + + for (i = 0; i < tableLen; i += 2) + if (strncmp(lc, (isOem ? lcToOemTable[i] : lcToAnsiTable[i]), lcLen) == 0) { + legacyCp = isOem ? lcToOemTable[i + 1] : lcToAnsiTable[i + 1]; + break; // Stop searching once a match is found + } + } + + if (legacyCp) { + iconv_t cd; + if ((cd = iconv_open("UTF-8", legacyCp)) != (iconv_t)-1) { + + AString s_utf8; + const char* src = s.Ptr(); + size_t slen = s.Len(); + size_t dlen = slen * 4; + char* dest = s_utf8.GetBuf_SetEnd(dlen + 1); // (source length * 4) + null termination + + char* srcPtr = const_cast(src); // iconv requires non-const input pointer + char* destPtr = dest; + size_t done = iconv(cd, &srcPtr, &slen, &destPtr, &dlen); + if (done == (size_t)-1) { + // Handle iconv error + iconv_close(cd); + // Add proper error handling or logging here + return; + } + + // Null-terminate the result + *destPtr = '\0'; + + iconv_close(cd); + + if (ConvertUTF8ToUnicode(s_utf8, res) /*|| ignore_Utf8_Errors*/) { + return; + } + } + } + } + #endif + if (isUtf8) { ConvertUTF8ToUnicode(s, res); From 48df95eb23f5babac56f25dfbe17f28d735c7299 Mon Sep 17 00:00:00 2001 From: Ivan Sorokin Date: Tue, 21 May 2024 21:23:29 +0200 Subject: [PATCH 02/14] clean up the code and fix some errors --- CPP/7zip/Archive/Zip/ZipItem.cpp | 1224 +++++++++++++++--------------- 1 file changed, 615 insertions(+), 609 deletions(-) diff --git a/CPP/7zip/Archive/Zip/ZipItem.cpp b/CPP/7zip/Archive/Zip/ZipItem.cpp index ad0fd740c..7d3218e41 100644 --- a/CPP/7zip/Archive/Zip/ZipItem.cpp +++ b/CPP/7zip/Archive/Zip/ZipItem.cpp @@ -1,609 +1,615 @@ -// Archive/ZipItem.cpp - -#ifndef _WIN32 -#include -#include -#endif - -#include "StdAfx.h" - -#include "../../../../C/CpuArch.h" -#include "../../../../C/7zCrc.h" - -#include "../../../Common/IntToString.h" -#include "../../../Common/MyLinux.h" -#include "../../../Common/StringConvert.h" - -#include "../../../Windows/PropVariantUtils.h" - -#include "../Common/ItemNameUtils.h" - -#include "ZipItem.h" - -namespace NArchive { -namespace NZip { - -using namespace NFileHeader; - - -/* -const char *k_SpecName_NTFS_STREAM = "@@NTFS@STREAM@"; -const char *k_SpecName_MAC_RESOURCE_FORK = "@@MAC@RESOURCE-FORK@"; -*/ - -static const CUInt32PCharPair g_ExtraTypes[] = -{ - { NExtraID::kZip64, "Zip64" }, - { NExtraID::kNTFS, "NTFS" }, - { NExtraID::kUnix0, "UNIX" }, - { NExtraID::kStrongEncrypt, "StrongCrypto" }, - { NExtraID::kUnixTime, "UT" }, - { NExtraID::kUnix1, "UX" }, - { NExtraID::kUnix2, "Ux" }, - { NExtraID::kUnixN, "ux" }, - { NExtraID::kIzUnicodeComment, "uc" }, - { NExtraID::kIzUnicodeName, "up" }, - { NExtraID::kIzNtSecurityDescriptor, "SD" }, - { NExtraID::kWzAES, "WzAES" }, - { NExtraID::kApkAlign, "ApkAlign" } -}; - -void CExtraSubBlock::PrintInfo(AString &s) const -{ - for (unsigned i = 0; i < ARRAY_SIZE(g_ExtraTypes); i++) - { - const CUInt32PCharPair &pair = g_ExtraTypes[i]; - if (pair.Value == ID) - { - s += pair.Name; - if (ID == NExtraID::kUnixTime) - { - if (Data.Size() >= 1) - { - s += ':'; - const Byte flags = Data[0]; - if (flags & 1) s += 'M'; - if (flags & 2) s += 'A'; - if (flags & 4) s += 'C'; - const UInt32 size = (UInt32)(Data.Size()) - 1; - if (size % 4 == 0) - { - s += ':'; - s.Add_UInt32(size / 4); - } - } - } - /* - if (ID == NExtraID::kApkAlign && Data.Size() >= 2) - { - char sz[32]; - sz[0] = ':'; - ConvertUInt32ToHex(GetUi16(Data), sz + 1); - s += sz; - for (unsigned j = 2; j < Data.Size(); j++) - { - char sz[32]; - sz[0] = '-'; - ConvertUInt32ToHex(Data[j], sz + 1); - s += sz; - } - } - */ - return; - } - } - { - char sz[32]; - sz[0] = '0'; - sz[1] = 'x'; - ConvertUInt32ToHex(ID, sz + 2); - s += sz; - } -} - - -void CExtraBlock::PrintInfo(AString &s) const -{ - if (Error) - s.Add_OptSpaced("Extra_ERROR"); - - if (MinorError) - s.Add_OptSpaced("Minor_Extra_ERROR"); - - if (IsZip64 || IsZip64_Error) - { - s.Add_OptSpaced("Zip64"); - if (IsZip64_Error) - s += "_ERROR"; - } - - FOR_VECTOR (i, SubBlocks) - { - s.Add_Space_if_NotEmpty(); - SubBlocks[i].PrintInfo(s); - } -} - - -bool CExtraSubBlock::ExtractNtfsTime(unsigned index, FILETIME &ft) const -{ - ft.dwHighDateTime = ft.dwLowDateTime = 0; - UInt32 size = (UInt32)Data.Size(); - if (ID != NExtraID::kNTFS || size < 32) - return false; - const Byte *p = (const Byte *)Data; - p += 4; // for reserved - size -= 4; - while (size > 4) - { - UInt16 tag = GetUi16(p); - unsigned attrSize = GetUi16(p + 2); - p += 4; - size -= 4; - if (attrSize > size) - attrSize = size; - - if (tag == NNtfsExtra::kTagTime && attrSize >= 24) - { - p += 8 * index; - ft.dwLowDateTime = GetUi32(p); - ft.dwHighDateTime = GetUi32(p + 4); - return true; - } - p += attrSize; - size -= attrSize; - } - return false; -} - -bool CExtraSubBlock::Extract_UnixTime(bool isCentral, unsigned index, UInt32 &res) const -{ - /* Info-Zip : - The central-header extra field contains the modification - time only, or no timestamp at all. - Size of Data is used to flag its presence or absence - If "Flags" indicates that Modtime is present in the local header - field, it MUST be present in the central header field, too - */ - - res = 0; - UInt32 size = (UInt32)Data.Size(); - if (ID != NExtraID::kUnixTime || size < 5) - return false; - const Byte *p = (const Byte *)Data; - const Byte flags = *p++; - size--; - if (isCentral) - { - if (index != NUnixTime::kMTime || - (flags & (1 << NUnixTime::kMTime)) == 0 || - size < 4) - return false; - res = GetUi32(p); - return true; - } - for (unsigned i = 0; i < 3; i++) - if ((flags & (1 << i)) != 0) - { - if (size < 4) - return false; - if (index == i) - { - res = GetUi32(p); - return true; - } - p += 4; - size -= 4; - } - return false; -} - - -// Info-ZIP's abandoned "Unix1 timestamps & owner ID info" - -bool CExtraSubBlock::Extract_Unix01_Time(unsigned index, UInt32 &res) const -{ - res = 0; - const unsigned offset = index * 4; - if (Data.Size() < offset + 4) - return false; - if (ID != NExtraID::kUnix0 && - ID != NExtraID::kUnix1) - return false; - const Byte *p = (const Byte *)Data + offset; - res = GetUi32(p); - return true; -} - -/* -// PKWARE's Unix "extra" is similar to Info-ZIP's abandoned "Unix1 timestamps" -bool CExtraSubBlock::Extract_Unix_Time(unsigned index, UInt32 &res) const -{ - res = 0; - const unsigned offset = index * 4; - if (ID != NExtraID::kUnix0 || Data.Size() < offset) - return false; - const Byte *p = (const Byte *)Data + offset; - res = GetUi32(p); - return true; -} -*/ - -bool CExtraBlock::GetNtfsTime(unsigned index, FILETIME &ft) const -{ - FOR_VECTOR (i, SubBlocks) - { - const CExtraSubBlock &sb = SubBlocks[i]; - if (sb.ID == NFileHeader::NExtraID::kNTFS) - return sb.ExtractNtfsTime(index, ft); - } - return false; -} - -bool CExtraBlock::GetUnixTime(bool isCentral, unsigned index, UInt32 &res) const -{ - { - FOR_VECTOR (i, SubBlocks) - { - const CExtraSubBlock &sb = SubBlocks[i]; - if (sb.ID == NFileHeader::NExtraID::kUnixTime) - return sb.Extract_UnixTime(isCentral, index, res); - } - } - - switch (index) - { - case NUnixTime::kMTime: index = NUnixExtra::kMTime; break; - case NUnixTime::kATime: index = NUnixExtra::kATime; break; - default: return false; - } - - { - FOR_VECTOR (i, SubBlocks) - { - const CExtraSubBlock &sb = SubBlocks[i]; - if (sb.ID == NFileHeader::NExtraID::kUnix0 || - sb.ID == NFileHeader::NExtraID::kUnix1) - return sb.Extract_Unix01_Time(index, res); - } - } - return false; -} - - -bool CLocalItem::IsDir() const -{ - return NItemName::HasTailSlash(Name, GetCodePage()); -} - -bool CItem::IsDir() const -{ - // FIXME: we can check InfoZip UTF-8 name at first. - if (NItemName::HasTailSlash(Name, GetCodePage())) - return true; - - Byte hostOS = GetHostOS(); - - if (Size == 0 && PackSize == 0 && !Name.IsEmpty() && Name.Back() == '\\') - { - // do we need to use CharPrevExA? - // .NET Framework 4.5 : System.IO.Compression::CreateFromDirectory() probably writes backslashes to headers? - // so we support that case - switch (hostOS) - { - case NHostOS::kFAT: - case NHostOS::kNTFS: - case NHostOS::kHPFS: - case NHostOS::kVFAT: - return true; - } - } - - if (!FromCentral) - return false; - - UInt16 highAttrib = (UInt16)((ExternalAttrib >> 16 ) & 0xFFFF); - - switch (hostOS) - { - case NHostOS::kAMIGA: - switch (highAttrib & NAmigaAttrib::kIFMT) - { - case NAmigaAttrib::kIFDIR: return true; - case NAmigaAttrib::kIFREG: return false; - default: return false; // change it throw kUnknownAttributes; - } - case NHostOS::kFAT: - case NHostOS::kNTFS: - case NHostOS::kHPFS: - case NHostOS::kVFAT: - return ((ExternalAttrib & FILE_ATTRIBUTE_DIRECTORY) != 0); - case NHostOS::kAtari: - case NHostOS::kMac: - case NHostOS::kVMS: - case NHostOS::kVM_CMS: - case NHostOS::kAcorn: - case NHostOS::kMVS: - return false; // change it throw kUnknownAttributes; - case NHostOS::kUnix: - return MY_LIN_S_ISDIR(highAttrib); - default: - return false; - } -} - -UInt32 CItem::GetWinAttrib() const -{ - UInt32 winAttrib = 0; - switch (GetHostOS()) - { - case NHostOS::kFAT: - case NHostOS::kNTFS: - if (FromCentral) - winAttrib = ExternalAttrib; - break; - case NHostOS::kUnix: - // do we need to clear 16 low bits in this case? - if (FromCentral) - { - /* - Some programs write posix attributes in high 16 bits of ExternalAttrib - Also some programs can write additional marker flag: - 0x8000 - p7zip - 0x4000 - Zip in MacOS - no marker - Info-Zip - - Client code has two options to detect posix field: - 1) check 0x8000 marker. In that case we must add 0x8000 marker here. - 2) check that high 4 bits (file type bits in posix field) of attributes are not zero. - */ - - winAttrib = ExternalAttrib & 0xFFFF0000; - - // #ifndef _WIN32 - winAttrib |= 0x8000; // add posix mode marker - // #endif - } - break; - } - if (IsDir()) // test it; - winAttrib |= FILE_ATTRIBUTE_DIRECTORY; - return winAttrib; -} - -bool CItem::GetPosixAttrib(UInt32 &attrib) const -{ - // some archivers can store PosixAttrib in high 16 bits even with HostOS=FAT. - if (FromCentral && GetHostOS() == NHostOS::kUnix) - { - attrib = ExternalAttrib >> 16; - return (attrib != 0); - } - attrib = 0; - if (IsDir()) - attrib = MY_LIN_S_IFDIR; - return false; -} - - -bool CExtraSubBlock::CheckIzUnicode(const AString &s) const -{ - size_t size = Data.Size(); - if (size < 1 + 4) - return false; - const Byte *p = (const Byte *)Data; - if (p[0] > 1) - return false; - if (CrcCalc(s, s.Len()) != GetUi32(p + 1)) - return false; - size -= 5; - p += 5; - for (size_t i = 0; i < size; i++) - if (p[i] == 0) - return false; - return Check_UTF8_Buf((const char *)(const void *)p, size, false); -} - - -void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, bool useSpecifiedCodePage, UINT codePage) const -{ - bool isUtf8 = IsUtf8(); - // bool ignore_Utf8_Errors = true; - - if (!isUtf8) - { - { - const unsigned id = isComment ? - NFileHeader::NExtraID::kIzUnicodeComment: - NFileHeader::NExtraID::kIzUnicodeName; - const CObjectVector &subBlocks = GetMainExtra().SubBlocks; - - FOR_VECTOR (i, subBlocks) - { - const CExtraSubBlock &sb = subBlocks[i]; - if (sb.ID == id) - { - if (sb.CheckIzUnicode(s)) - { - // const unsigned kIzUnicodeHeaderSize = 5; - if (Convert_UTF8_Buf_To_Unicode( - (const char *)(const void *)(const Byte *)sb.Data + 5, - sb.Data.Size() - 5, res)) - return; - } - break; - } - } - } - - if (useSpecifiedCodePage) - isUtf8 = (codePage == CP_UTF8); - #ifdef _WIN32 - else if (GetHostOS() == NFileHeader::NHostOS::kUnix) - { - /* Some ZIP archives in Unix use UTF-8 encoding without Utf8 flag in header. - We try to get name as UTF-8. - Do we need to do it in POSIX version also? */ - isUtf8 = true; - - /* 21.02: we want to ignore UTF-8 errors to support file paths that are mixed - of UTF-8 and non-UTF-8 characters. */ - // ignore_Utf8_Errors = false; - // ignore_Utf8_Errors = true; - } - #endif - } - - #ifndef _WIN32 - - // Convert OEM char set to UTF-8 if needed - // Use system locale to select code page - - // locale -> code page translation tables generated from Wine source code - - const char *lcToOemTable[] = { - "af_ZA", "CP850", "ar_SA", "CP720", "ar_LB", "CP720", "ar_EG", "CP720", - "ar_DZ", "CP720", "ar_BH", "CP720", "ar_IQ", "CP720", "ar_JO", "CP720", - "ar_KW", "CP720", "ar_LY", "CP720", "ar_MA", "CP720", "ar_OM", "CP720", - "ar_QA", "CP720", "ar_SY", "CP720", "ar_TN", "CP720", "ar_AE", "CP720", - "ar_YE", "CP720", "ast_ES", "CP850", "az_AZ", "CP866", "az_AZ", "CP857", - "be_BY", "CP866", "bg_BG", "CP866", "br_FR", "CP850", "ca_ES", "CP850", - "zh_CN", "CP936", "zh_TW", "CP950", "kw_GB", "CP850", "cs_CZ", "CP852", - "cy_GB", "CP850", "da_DK", "CP850", "de_AT", "CP850", "de_LI", "CP850", - "de_LU", "CP850", "de_CH", "CP850", "de_DE", "CP850", "el_GR", "CP737", - "en_AU", "CP850", "en_CA", "CP850", "en_GB", "CP850", "en_IE", "CP850", - "en_JM", "CP850", "en_BZ", "CP850", "en_PH", "CP437", "en_ZA", "CP437", - "en_TT", "CP850", "en_US", "CP437", "en_ZW", "CP437", "en_NZ", "CP850", - "es_PA", "CP850", "es_BO", "CP850", "es_CR", "CP850", "es_DO", "CP850", - "es_SV", "CP850", "es_EC", "CP850", "es_GT", "CP850", "es_HN", "CP850", - "es_NI", "CP850", "es_CL", "CP850", "es_MX", "CP850", "es_ES", "CP850", - "es_CO", "CP850", "es_ES", "CP850", "es_PE", "CP850", "es_AR", "CP850", - "es_PR", "CP850", "es_VE", "CP850", "es_UY", "CP850", "es_PY", "CP850", - "et_EE", "CP775", "eu_ES", "CP850", "fa_IR", "CP720", "fi_FI", "CP850", - "fo_FO", "CP850", "fr_FR", "CP850", "fr_BE", "CP850", "fr_CA", "CP850", - "fr_LU", "CP850", "fr_MC", "CP850", "fr_CH", "CP850", "ga_IE", "CP437", - "gd_GB", "CP850", "gv_IM", "CP850", "gl_ES", "CP850", "he_IL", "CP862", - "hr_HR", "CP852", "hu_HU", "CP852", "id_ID", "CP850", "is_IS", "CP850", - "it_IT", "CP850", "it_CH", "CP850", "iv_IV", "CP437", "ja_JP", "CP932", - "kk_KZ", "CP866", "ko_KR", "CP949", "ky_KG", "CP866", "lt_LT", "CP775", - "lv_LV", "CP775", "mk_MK", "CP866", "mn_MN", "CP866", "ms_BN", "CP850", - "ms_MY", "CP850", "nl_BE", "CP850", "nl_NL", "CP850", "nl_SR", "CP850", - "nn_NO", "CP850", "nb_NO", "CP850", "pl_PL", "CP852", "pt_BR", "CP850", - "pt_PT", "CP850", "rm_CH", "CP850", "ro_RO", "CP852", "ru_RU", "CP866", - "sk_SK", "CP852", "sl_SI", "CP852", "sq_AL", "CP852", "sr_RS", "CP855", - "sr_RS", "CP852", "sv_SE", "CP850", "sv_FI", "CP850", "sw_KE", "CP437", - "th_TH", "CP874", "tr_TR", "CP857", "tt_RU", "CP866", "uk_UA", "CP866", - "ur_PK", "CP720", "uz_UZ", "CP866", "uz_UZ", "CP857", "vi_VN", "CP1258", - "wa_BE", "CP850", "zh_HK", "CP950", "zh_SG", "CP936"}; - - const char *lcToAnsiTable[] = { - "af_ZA", "CP1252", "ar_SA", "CP1256", "ar_LB", "CP1256", "ar_EG", "CP1256", - "ar_DZ", "CP1256", "ar_BH", "CP1256", "ar_IQ", "CP1256", "ar_JO", "CP1256", - "ar_KW", "CP1256", "ar_LY", "CP1256", "ar_MA", "CP1256", "ar_OM", "CP1256", - "ar_QA", "CP1256", "ar_SY", "CP1256", "ar_TN", "CP1256", "ar_AE", "CP1256", - "ar_YE", "CP1256","ast_ES", "CP1252", "az_AZ", "CP1251", "az_AZ", "CP1254", - "be_BY", "CP1251", "bg_BG", "CP1251", "br_FR", "CP1252", "ca_ES", "CP1252", - "zh_CN", "CP936", "zh_TW", "CP950", "kw_GB", "CP1252", "cs_CZ", "CP1250", - "cy_GB", "CP1252", "da_DK", "CP1252", "de_AT", "CP1252", "de_LI", "CP1252", - "de_LU", "CP1252", "de_CH", "CP1252", "de_DE", "CP1252", "el_GR", "CP1253", - "en_AU", "CP1252", "en_CA", "CP1252", "en_GB", "CP1252", "en_IE", "CP1252", - "en_JM", "CP1252", "en_BZ", "CP1252", "en_PH", "CP1252", "en_ZA", "CP1252", - "en_TT", "CP1252", "en_US", "CP1252", "en_ZW", "CP1252", "en_NZ", "CP1252", - "es_PA", "CP1252", "es_BO", "CP1252", "es_CR", "CP1252", "es_DO", "CP1252", - "es_SV", "CP1252", "es_EC", "CP1252", "es_GT", "CP1252", "es_HN", "CP1252", - "es_NI", "CP1252", "es_CL", "CP1252", "es_MX", "CP1252", "es_ES", "CP1252", - "es_CO", "CP1252", "es_ES", "CP1252", "es_PE", "CP1252", "es_AR", "CP1252", - "es_PR", "CP1252", "es_VE", "CP1252", "es_UY", "CP1252", "es_PY", "CP1252", - "et_EE", "CP1257", "eu_ES", "CP1252", "fa_IR", "CP1256", "fi_FI", "CP1252", - "fo_FO", "CP1252", "fr_FR", "CP1252", "fr_BE", "CP1252", "fr_CA", "CP1252", - "fr_LU", "CP1252", "fr_MC", "CP1252", "fr_CH", "CP1252", "ga_IE", "CP1252", - "gd_GB", "CP1252", "gv_IM", "CP1252", "gl_ES", "CP1252", "he_IL", "CP1255", - "hr_HR", "CP1250", "hu_HU", "CP1250", "id_ID", "CP1252", "is_IS", "CP1252", - "it_IT", "CP1252", "it_CH", "CP1252", "iv_IV", "CP1252", "ja_JP", "CP932", - "kk_KZ", "CP1251", "ko_KR", "CP949", "ky_KG", "CP1251", "lt_LT", "CP1257", - "lv_LV", "CP1257", "mk_MK", "CP1251", "mn_MN", "CP1251", "ms_BN", "CP1252", - "ms_MY", "CP1252", "nl_BE", "CP1252", "nl_NL", "CP1252", "nl_SR", "CP1252", - "nn_NO", "CP1252", "nb_NO", "CP1252", "pl_PL", "CP1250", "pt_BR", "CP1252", - "pt_PT", "CP1252", "rm_CH", "CP1252", "ro_RO", "CP1250", "ru_RU", "CP1251", - "sk_SK", "CP1250", "sl_SI", "CP1250", "sq_AL", "CP1250", "sr_RS", "CP1251", - "sr_RS", "CP1250", "sv_SE", "CP1252", "sv_FI", "CP1252", "sw_KE", "CP1252", - "th_TH", "CP874", "tr_TR", "CP1254", "tt_RU", "CP1251", "uk_UA", "CP1251", - "ur_PK", "CP1256", "uz_UZ", "CP1251", "uz_UZ", "CP1254", "vi_VN", "CP1258", - "wa_BE", "CP1252", "zh_HK", "CP950", "zh_SG", "CP936"}; - - bool isAnsi = false; - bool isOem = false; - - if (!isUtf8 && - MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS && - MadeByVersion.Version >= 20) { - isAnsi = true; - } else if (!isUtf8 && - (MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS || - MadeByVersion.HostOS == NFileHeader::NHostOS::kFAT)) { - isOem = true; - } - - if (isOem || isAnsi) { - - const char *legacyCp = nullptr; - int tableLen = sizeof(isOem ? lcToOemTable : lcToAnsiTable) / sizeof(char *); - int lcLen = 0, i; - - // Detect required code page name from current locale - char *lc = setlocale(LC_CTYPE, ""); - - if (lc && lc[0]) { - // Compare up to the dot, if it exists, e.g. en_US.UTF-8 - for (lcLen = 0; lc[lcLen] != '.' && lc[lcLen] != '\0'; ++lcLen); - - for (i = 0; i < tableLen; i += 2) - if (strncmp(lc, (isOem ? lcToOemTable[i] : lcToAnsiTable[i]), lcLen) == 0) { - legacyCp = isOem ? lcToOemTable[i + 1] : lcToAnsiTable[i + 1]; - break; // Stop searching once a match is found - } - } - - if (legacyCp) { - iconv_t cd; - if ((cd = iconv_open("UTF-8", legacyCp)) != (iconv_t)-1) { - - AString s_utf8; - const char* src = s.Ptr(); - size_t slen = s.Len(); - size_t dlen = slen * 4; - char* dest = s_utf8.GetBuf_SetEnd(dlen + 1); // (source length * 4) + null termination - - char* srcPtr = const_cast(src); // iconv requires non-const input pointer - char* destPtr = dest; - size_t done = iconv(cd, &srcPtr, &slen, &destPtr, &dlen); - if (done == (size_t)-1) { - // Handle iconv error - iconv_close(cd); - // Add proper error handling or logging here - return; - } - - // Null-terminate the result - *destPtr = '\0'; - - iconv_close(cd); - - if (ConvertUTF8ToUnicode(s_utf8, res) /*|| ignore_Utf8_Errors*/) { - return; - } - } - } - } - #endif - - if (isUtf8) - { - ConvertUTF8ToUnicode(s, res); - return; - } - - MultiByteToUnicodeString2(res, s, useSpecifiedCodePage ? codePage : GetCodePage()); -} - -}} +// Archive/ZipItem.cpp + +#ifndef _WIN32 +#include +#include +#endif + +#include "StdAfx.h" + +#include "../../../../C/CpuArch.h" +#include "../../../../C/7zCrc.h" + +#include "../../../Common/IntToString.h" +#include "../../../Common/MyLinux.h" +#include "../../../Common/StringConvert.h" + +#include "../../../Windows/PropVariantUtils.h" + +#include "../Common/ItemNameUtils.h" + +#include "ZipItem.h" + +namespace NArchive { +namespace NZip { + +using namespace NFileHeader; + + +/* +const char *k_SpecName_NTFS_STREAM = "@@NTFS@STREAM@"; +const char *k_SpecName_MAC_RESOURCE_FORK = "@@MAC@RESOURCE-FORK@"; +*/ + +static const CUInt32PCharPair g_ExtraTypes[] = +{ + { NExtraID::kZip64, "Zip64" }, + { NExtraID::kNTFS, "NTFS" }, + { NExtraID::kUnix0, "UNIX" }, + { NExtraID::kStrongEncrypt, "StrongCrypto" }, + { NExtraID::kUnixTime, "UT" }, + { NExtraID::kUnix1, "UX" }, + { NExtraID::kUnix2, "Ux" }, + { NExtraID::kUnixN, "ux" }, + { NExtraID::kIzUnicodeComment, "uc" }, + { NExtraID::kIzUnicodeName, "up" }, + { NExtraID::kIzNtSecurityDescriptor, "SD" }, + { NExtraID::kWzAES, "WzAES" }, + { NExtraID::kApkAlign, "ApkAlign" } +}; + +void CExtraSubBlock::PrintInfo(AString &s) const +{ + for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_ExtraTypes); i++) + { + const CUInt32PCharPair &pair = g_ExtraTypes[i]; + if (pair.Value == ID) + { + s += pair.Name; + if (ID == NExtraID::kUnixTime) + { + if (Data.Size() >= 1) + { + s.Add_Colon(); + const Byte flags = Data[0]; + if (flags & 1) s.Add_Char('M'); + if (flags & 2) s.Add_Char('A'); + if (flags & 4) s.Add_Char('C'); + const UInt32 size = (UInt32)(Data.Size()) - 1; + if (size % 4 == 0) + { + s.Add_Colon(); + s.Add_UInt32(size / 4); + } + } + } + /* + if (ID == NExtraID::kApkAlign && Data.Size() >= 2) + { + char sz[32]; + sz[0] = ':'; + ConvertUInt32ToHex(GetUi16(Data), sz + 1); + s += sz; + for (unsigned j = 2; j < Data.Size(); j++) + { + char sz[32]; + sz[0] = '-'; + ConvertUInt32ToHex(Data[j], sz + 1); + s += sz; + } + } + */ + return; + } + } + { + char sz[16]; + sz[0] = '0'; + sz[1] = 'x'; + ConvertUInt32ToHex(ID, sz + 2); + s += sz; + } +} + + +void CExtraBlock::PrintInfo(AString &s) const +{ + if (Error) + s.Add_OptSpaced("Extra_ERROR"); + + if (MinorError) + s.Add_OptSpaced("Minor_Extra_ERROR"); + + if (IsZip64 || IsZip64_Error) + { + s.Add_OptSpaced("Zip64"); + if (IsZip64_Error) + s += "_ERROR"; + } + + FOR_VECTOR (i, SubBlocks) + { + s.Add_Space_if_NotEmpty(); + SubBlocks[i].PrintInfo(s); + } +} + + +bool CExtraSubBlock::ExtractNtfsTime(unsigned index, FILETIME &ft) const +{ + ft.dwHighDateTime = ft.dwLowDateTime = 0; + UInt32 size = (UInt32)Data.Size(); + if (ID != NExtraID::kNTFS || size < 32) + return false; + const Byte *p = (const Byte *)Data; + p += 4; // for reserved + size -= 4; + while (size > 4) + { + UInt16 tag = GetUi16(p); + unsigned attrSize = GetUi16(p + 2); + p += 4; + size -= 4; + if (attrSize > size) + attrSize = size; + + if (tag == NNtfsExtra::kTagTime && attrSize >= 24) + { + p += 8 * index; + ft.dwLowDateTime = GetUi32(p); + ft.dwHighDateTime = GetUi32(p + 4); + return true; + } + p += attrSize; + size -= attrSize; + } + return false; +} + +bool CExtraSubBlock::Extract_UnixTime(bool isCentral, unsigned index, UInt32 &res) const +{ + /* Info-Zip : + The central-header extra field contains the modification + time only, or no timestamp at all. + Size of Data is used to flag its presence or absence + If "Flags" indicates that Modtime is present in the local header + field, it MUST be present in the central header field, too + */ + + res = 0; + UInt32 size = (UInt32)Data.Size(); + if (ID != NExtraID::kUnixTime || size < 5) + return false; + const Byte *p = (const Byte *)Data; + const Byte flags = *p++; + size--; + if (isCentral) + { + if (index != NUnixTime::kMTime || + (flags & (1 << NUnixTime::kMTime)) == 0 || + size < 4) + return false; + res = GetUi32(p); + return true; + } + for (unsigned i = 0; i < 3; i++) + if ((flags & (1 << i)) != 0) + { + if (size < 4) + return false; + if (index == i) + { + res = GetUi32(p); + return true; + } + p += 4; + size -= 4; + } + return false; +} + + +// Info-ZIP's abandoned "Unix1 timestamps & owner ID info" + +bool CExtraSubBlock::Extract_Unix01_Time(unsigned index, UInt32 &res) const +{ + res = 0; + const unsigned offset = index * 4; + if (Data.Size() < offset + 4) + return false; + if (ID != NExtraID::kUnix0 && + ID != NExtraID::kUnix1) + return false; + const Byte *p = (const Byte *)Data + offset; + res = GetUi32(p); + return true; +} + +/* +// PKWARE's Unix "extra" is similar to Info-ZIP's abandoned "Unix1 timestamps" +bool CExtraSubBlock::Extract_Unix_Time(unsigned index, UInt32 &res) const +{ + res = 0; + const unsigned offset = index * 4; + if (ID != NExtraID::kUnix0 || Data.Size() < offset) + return false; + const Byte *p = (const Byte *)Data + offset; + res = GetUi32(p); + return true; +} +*/ + +bool CExtraBlock::GetNtfsTime(unsigned index, FILETIME &ft) const +{ + FOR_VECTOR (i, SubBlocks) + { + const CExtraSubBlock &sb = SubBlocks[i]; + if (sb.ID == NFileHeader::NExtraID::kNTFS) + return sb.ExtractNtfsTime(index, ft); + } + return false; +} + +bool CExtraBlock::GetUnixTime(bool isCentral, unsigned index, UInt32 &res) const +{ + { + FOR_VECTOR (i, SubBlocks) + { + const CExtraSubBlock &sb = SubBlocks[i]; + if (sb.ID == NFileHeader::NExtraID::kUnixTime) + return sb.Extract_UnixTime(isCentral, index, res); + } + } + + switch (index) + { + case NUnixTime::kMTime: index = NUnixExtra::kMTime; break; + case NUnixTime::kATime: index = NUnixExtra::kATime; break; + default: return false; + } + + { + FOR_VECTOR (i, SubBlocks) + { + const CExtraSubBlock &sb = SubBlocks[i]; + if (sb.ID == NFileHeader::NExtraID::kUnix0 || + sb.ID == NFileHeader::NExtraID::kUnix1) + return sb.Extract_Unix01_Time(index, res); + } + } + return false; +} + + +bool CLocalItem::IsDir() const +{ + return NItemName::HasTailSlash(Name, GetCodePage()); +} + +bool CItem::IsDir() const +{ + // FIXME: we can check InfoZip UTF-8 name at first. + if (NItemName::HasTailSlash(Name, GetCodePage())) + return true; + + Byte hostOS = GetHostOS(); + + if (Size == 0 && PackSize == 0 && !Name.IsEmpty() && Name.Back() == '\\') + { + // do we need to use CharPrevExA? + // .NET Framework 4.5 : System.IO.Compression::CreateFromDirectory() probably writes backslashes to headers? + // so we support that case + switch (hostOS) + { + case NHostOS::kFAT: + case NHostOS::kNTFS: + case NHostOS::kHPFS: + case NHostOS::kVFAT: + return true; + default: break; + } + } + + if (!FromCentral) + return false; + + UInt16 highAttrib = (UInt16)((ExternalAttrib >> 16 ) & 0xFFFF); + + switch (hostOS) + { + case NHostOS::kAMIGA: + switch (highAttrib & NAmigaAttrib::kIFMT) + { + case NAmigaAttrib::kIFDIR: return true; + case NAmigaAttrib::kIFREG: return false; + default: return false; // change it throw kUnknownAttributes; + } + case NHostOS::kFAT: + case NHostOS::kNTFS: + case NHostOS::kHPFS: + case NHostOS::kVFAT: + return ((ExternalAttrib & FILE_ATTRIBUTE_DIRECTORY) != 0); + case NHostOS::kAtari: + case NHostOS::kMac: + case NHostOS::kVMS: + case NHostOS::kVM_CMS: + case NHostOS::kAcorn: + case NHostOS::kMVS: + return false; // change it throw kUnknownAttributes; + case NHostOS::kUnix: + return MY_LIN_S_ISDIR(highAttrib); + default: + return false; + } +} + +UInt32 CItem::GetWinAttrib() const +{ + UInt32 winAttrib = 0; + switch (GetHostOS()) + { + case NHostOS::kFAT: + case NHostOS::kNTFS: + if (FromCentral) + winAttrib = ExternalAttrib; + break; + case NHostOS::kUnix: + // do we need to clear 16 low bits in this case? + if (FromCentral) + { + /* + Some programs write posix attributes in high 16 bits of ExternalAttrib + Also some programs can write additional marker flag: + 0x8000 - p7zip + 0x4000 - Zip in MacOS + no marker - Info-Zip + + Client code has two options to detect posix field: + 1) check 0x8000 marker. In that case we must add 0x8000 marker here. + 2) check that high 4 bits (file type bits in posix field) of attributes are not zero. + */ + + winAttrib = ExternalAttrib & 0xFFFF0000; + + // #ifndef _WIN32 + winAttrib |= 0x8000; // add posix mode marker + // #endif + } + break; + default: break; + } + if (IsDir()) // test it; + winAttrib |= FILE_ATTRIBUTE_DIRECTORY; + return winAttrib; +} + +bool CItem::GetPosixAttrib(UInt32 &attrib) const +{ + // some archivers can store PosixAttrib in high 16 bits even with HostOS=FAT. + if (FromCentral && GetHostOS() == NHostOS::kUnix) + { + attrib = ExternalAttrib >> 16; + return (attrib != 0); + } + attrib = 0; + if (IsDir()) + attrib = MY_LIN_S_IFDIR; + return false; +} + + +bool CExtraSubBlock::CheckIzUnicode(const AString &s) const +{ + size_t size = Data.Size(); + if (size < 1 + 4) + return false; + const Byte *p = (const Byte *)Data; + if (p[0] > 1) + return false; + if (CrcCalc(s, s.Len()) != GetUi32(p + 1)) + return false; + size -= 5; + p += 5; + for (size_t i = 0; i < size; i++) + if (p[i] == 0) + return false; + return Check_UTF8_Buf((const char *)(const void *)p, size, false); +} + + +void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, bool useSpecifiedCodePage, UINT codePage) const +{ + bool isUtf8 = IsUtf8(); + // bool ignore_Utf8_Errors = true; + + if (!isUtf8) + { + { + const unsigned id = isComment ? + NFileHeader::NExtraID::kIzUnicodeComment: + NFileHeader::NExtraID::kIzUnicodeName; + const CObjectVector &subBlocks = GetMainExtra().SubBlocks; + + FOR_VECTOR (i, subBlocks) + { + const CExtraSubBlock &sb = subBlocks[i]; + if (sb.ID == id) + { + if (sb.CheckIzUnicode(s)) + { + // const unsigned kIzUnicodeHeaderSize = 5; + if (Convert_UTF8_Buf_To_Unicode( + (const char *)(const void *)(const Byte *)sb.Data + 5, + sb.Data.Size() - 5, res)) + return; + } + break; + } + } + } + + if (useSpecifiedCodePage) + isUtf8 = (codePage == CP_UTF8); + #ifdef _WIN32 + else if (GetHostOS() == NFileHeader::NHostOS::kUnix) + { + /* Some ZIP archives in Unix use UTF-8 encoding without Utf8 flag in header. + We try to get name as UTF-8. + Do we need to do it in POSIX version also? */ + isUtf8 = true; + + /* 21.02: we want to ignore UTF-8 errors to support file paths that are mixed + of UTF-8 and non-UTF-8 characters. */ + // ignore_Utf8_Errors = false; + // ignore_Utf8_Errors = true; + } + #endif + } + + #ifndef _WIN32 + + // Convert OEM char set to UTF-8 if needed + // Use system locale to select code page + + // locale -> code page translation tables generated from Wine source code + + const char *lcToOemTable[] = { + "af_ZA", "CP850", "ar_SA", "CP720", "ar_LB", "CP720", "ar_EG", "CP720", + "ar_DZ", "CP720", "ar_BH", "CP720", "ar_IQ", "CP720", "ar_JO", "CP720", + "ar_KW", "CP720", "ar_LY", "CP720", "ar_MA", "CP720", "ar_OM", "CP720", + "ar_QA", "CP720", "ar_SY", "CP720", "ar_TN", "CP720", "ar_AE", "CP720", + "ar_YE", "CP720", "ast_ES", "CP850", "az_AZ", "CP866", "az_AZ", "CP857", + "be_BY", "CP866", "bg_BG", "CP866", "br_FR", "CP850", "ca_ES", "CP850", + "zh_CN", "CP936", "zh_TW", "CP950", "kw_GB", "CP850", "cs_CZ", "CP852", + "cy_GB", "CP850", "da_DK", "CP850", "de_AT", "CP850", "de_LI", "CP850", + "de_LU", "CP850", "de_CH", "CP850", "de_DE", "CP850", "el_GR", "CP737", + "en_AU", "CP850", "en_CA", "CP850", "en_GB", "CP850", "en_IE", "CP850", + "en_JM", "CP850", "en_BZ", "CP850", "en_PH", "CP437", "en_ZA", "CP437", + "en_TT", "CP850", "en_US", "CP437", "en_ZW", "CP437", "en_NZ", "CP850", + "es_PA", "CP850", "es_BO", "CP850", "es_CR", "CP850", "es_DO", "CP850", + "es_SV", "CP850", "es_EC", "CP850", "es_GT", "CP850", "es_HN", "CP850", + "es_NI", "CP850", "es_CL", "CP850", "es_MX", "CP850", "es_ES", "CP850", + "es_CO", "CP850", "es_ES", "CP850", "es_PE", "CP850", "es_AR", "CP850", + "es_PR", "CP850", "es_VE", "CP850", "es_UY", "CP850", "es_PY", "CP850", + "et_EE", "CP775", "eu_ES", "CP850", "fa_IR", "CP720", "fi_FI", "CP850", + "fo_FO", "CP850", "fr_FR", "CP850", "fr_BE", "CP850", "fr_CA", "CP850", + "fr_LU", "CP850", "fr_MC", "CP850", "fr_CH", "CP850", "ga_IE", "CP437", + "gd_GB", "CP850", "gv_IM", "CP850", "gl_ES", "CP850", "he_IL", "CP862", + "hr_HR", "CP852", "hu_HU", "CP852", "id_ID", "CP850", "is_IS", "CP850", + "it_IT", "CP850", "it_CH", "CP850", "iv_IV", "CP437", "ja_JP", "CP932", + "kk_KZ", "CP866", "ko_KR", "CP949", "ky_KG", "CP866", "lt_LT", "CP775", + "lv_LV", "CP775", "mk_MK", "CP866", "mn_MN", "CP866", "ms_BN", "CP850", + "ms_MY", "CP850", "nl_BE", "CP850", "nl_NL", "CP850", "nl_SR", "CP850", + "nn_NO", "CP850", "nb_NO", "CP850", "pl_PL", "CP852", "pt_BR", "CP850", + "pt_PT", "CP850", "rm_CH", "CP850", "ro_RO", "CP852", "ru_RU", "CP866", + "sk_SK", "CP852", "sl_SI", "CP852", "sq_AL", "CP852", "sr_RS", "CP855", + "sr_RS", "CP852", "sv_SE", "CP850", "sv_FI", "CP850", "sw_KE", "CP437", + "th_TH", "CP874", "tr_TR", "CP857", "tt_RU", "CP866", "uk_UA", "CP866", + "ur_PK", "CP720", "uz_UZ", "CP866", "uz_UZ", "CP857", "vi_VN", "CP1258", + "wa_BE", "CP850", "zh_HK", "CP950", "zh_SG", "CP936"}; + + const char *lcToAnsiTable[] = { + "af_ZA", "CP1252", "ar_SA", "CP1256", "ar_LB", "CP1256", "ar_EG", "CP1256", + "ar_DZ", "CP1256", "ar_BH", "CP1256", "ar_IQ", "CP1256", "ar_JO", "CP1256", + "ar_KW", "CP1256", "ar_LY", "CP1256", "ar_MA", "CP1256", "ar_OM", "CP1256", + "ar_QA", "CP1256", "ar_SY", "CP1256", "ar_TN", "CP1256", "ar_AE", "CP1256", + "ar_YE", "CP1256","ast_ES", "CP1252", "az_AZ", "CP1251", "az_AZ", "CP1254", + "be_BY", "CP1251", "bg_BG", "CP1251", "br_FR", "CP1252", "ca_ES", "CP1252", + "zh_CN", "CP936", "zh_TW", "CP950", "kw_GB", "CP1252", "cs_CZ", "CP1250", + "cy_GB", "CP1252", "da_DK", "CP1252", "de_AT", "CP1252", "de_LI", "CP1252", + "de_LU", "CP1252", "de_CH", "CP1252", "de_DE", "CP1252", "el_GR", "CP1253", + "en_AU", "CP1252", "en_CA", "CP1252", "en_GB", "CP1252", "en_IE", "CP1252", + "en_JM", "CP1252", "en_BZ", "CP1252", "en_PH", "CP1252", "en_ZA", "CP1252", + "en_TT", "CP1252", "en_US", "CP1252", "en_ZW", "CP1252", "en_NZ", "CP1252", + "es_PA", "CP1252", "es_BO", "CP1252", "es_CR", "CP1252", "es_DO", "CP1252", + "es_SV", "CP1252", "es_EC", "CP1252", "es_GT", "CP1252", "es_HN", "CP1252", + "es_NI", "CP1252", "es_CL", "CP1252", "es_MX", "CP1252", "es_ES", "CP1252", + "es_CO", "CP1252", "es_ES", "CP1252", "es_PE", "CP1252", "es_AR", "CP1252", + "es_PR", "CP1252", "es_VE", "CP1252", "es_UY", "CP1252", "es_PY", "CP1252", + "et_EE", "CP1257", "eu_ES", "CP1252", "fa_IR", "CP1256", "fi_FI", "CP1252", + "fo_FO", "CP1252", "fr_FR", "CP1252", "fr_BE", "CP1252", "fr_CA", "CP1252", + "fr_LU", "CP1252", "fr_MC", "CP1252", "fr_CH", "CP1252", "ga_IE", "CP1252", + "gd_GB", "CP1252", "gv_IM", "CP1252", "gl_ES", "CP1252", "he_IL", "CP1255", + "hr_HR", "CP1250", "hu_HU", "CP1250", "id_ID", "CP1252", "is_IS", "CP1252", + "it_IT", "CP1252", "it_CH", "CP1252", "iv_IV", "CP1252", "ja_JP", "CP932", + "kk_KZ", "CP1251", "ko_KR", "CP949", "ky_KG", "CP1251", "lt_LT", "CP1257", + "lv_LV", "CP1257", "mk_MK", "CP1251", "mn_MN", "CP1251", "ms_BN", "CP1252", + "ms_MY", "CP1252", "nl_BE", "CP1252", "nl_NL", "CP1252", "nl_SR", "CP1252", + "nn_NO", "CP1252", "nb_NO", "CP1252", "pl_PL", "CP1250", "pt_BR", "CP1252", + "pt_PT", "CP1252", "rm_CH", "CP1252", "ro_RO", "CP1250", "ru_RU", "CP1251", + "sk_SK", "CP1250", "sl_SI", "CP1250", "sq_AL", "CP1250", "sr_RS", "CP1251", + "sr_RS", "CP1250", "sv_SE", "CP1252", "sv_FI", "CP1252", "sw_KE", "CP1252", + "th_TH", "CP874", "tr_TR", "CP1254", "tt_RU", "CP1251", "uk_UA", "CP1251", + "ur_PK", "CP1256", "uz_UZ", "CP1251", "uz_UZ", "CP1254", "vi_VN", "CP1258", + "wa_BE", "CP1252", "zh_HK", "CP950", "zh_SG", "CP936"}; + + bool isAnsi = false; + bool isOem = false; + + if (!isUtf8 && + MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS && + MadeByVersion.Version >= 20) { + isAnsi = true; + } else if (!isUtf8 && + (MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS || + MadeByVersion.HostOS == NFileHeader::NHostOS::kFAT)) { + isOem = true; + } + + if (isOem || isAnsi) { + + const char *legacyCp = nullptr; + int tableLen = sizeof(isOem ? lcToOemTable : lcToAnsiTable) / sizeof(char *); + int lcLen = 0, i; + + // Detect required code page name from current locale + char *lc = setlocale(LC_CTYPE, ""); + + if (lc && lc[0]) { + // Compare up to the dot, if it exists, e.g. en_US.UTF-8 + for (lcLen = 0; lc[lcLen] != '.' && lc[lcLen] != ':' && lc[lcLen] != '\0'; ++lcLen); + + for (i = 0; i < tableLen; i += 2) + if (strncmp(lc, (isOem ? lcToOemTable[i] : lcToAnsiTable[i]), lcLen) == 0) { + legacyCp = isOem ? lcToOemTable[i + 1] : lcToAnsiTable[i + 1]; + break; // Stop searching once a match is found + } + } + + if (legacyCp) { + iconv_t cd; + if ((cd = iconv_open("UTF-8", legacyCp)) != (iconv_t)-1) { + + AString s_utf8; + + size_t slen = s.Len(); + char* src = s.Ptr_non_const(); + + size_t dlen = slen * 4 + 1; // (source length * 4) + null termination + char* dst = s_utf8.GetBuf_SetEnd(dlen); + + memset(dst, 0, dlen); + + size_t done = iconv(cd, &src, &slen, &dst, &dlen); + + if (done == (size_t)-1) { + iconv_close(cd); + + // iconv failed. Falling back to default behavior + MultiByteToUnicodeString2(res, s, useSpecifiedCodePage ? codePage : GetCodePage()); + return; + } + + // Null-terminate the result + *dst = '\0'; + + iconv_close(cd); + + if (ConvertUTF8ToUnicode(s_utf8, res) /*|| ignore_Utf8_Errors*/) { + return; + } + } + } + } + #endif + + if (isUtf8) + { + ConvertUTF8ToUnicode(s, res); + return; + } + + MultiByteToUnicodeString2(res, s, useSpecifiedCodePage ? codePage : GetCodePage()); +} + +}} From f017755b178b30e8591481b3c3c0d81d1d932d7c Mon Sep 17 00:00:00 2001 From: Ivan Sorokin Date: Tue, 21 May 2024 21:38:34 +0200 Subject: [PATCH 03/14] remove code commited by a mistake --- CPP/7zip/Archive/Zip/ZipItem.cpp | 1223 +++++++++++++++--------------- 1 file changed, 608 insertions(+), 615 deletions(-) diff --git a/CPP/7zip/Archive/Zip/ZipItem.cpp b/CPP/7zip/Archive/Zip/ZipItem.cpp index 7d3218e41..cc7908192 100644 --- a/CPP/7zip/Archive/Zip/ZipItem.cpp +++ b/CPP/7zip/Archive/Zip/ZipItem.cpp @@ -1,615 +1,608 @@ -// Archive/ZipItem.cpp - -#ifndef _WIN32 -#include -#include -#endif - -#include "StdAfx.h" - -#include "../../../../C/CpuArch.h" -#include "../../../../C/7zCrc.h" - -#include "../../../Common/IntToString.h" -#include "../../../Common/MyLinux.h" -#include "../../../Common/StringConvert.h" - -#include "../../../Windows/PropVariantUtils.h" - -#include "../Common/ItemNameUtils.h" - -#include "ZipItem.h" - -namespace NArchive { -namespace NZip { - -using namespace NFileHeader; - - -/* -const char *k_SpecName_NTFS_STREAM = "@@NTFS@STREAM@"; -const char *k_SpecName_MAC_RESOURCE_FORK = "@@MAC@RESOURCE-FORK@"; -*/ - -static const CUInt32PCharPair g_ExtraTypes[] = -{ - { NExtraID::kZip64, "Zip64" }, - { NExtraID::kNTFS, "NTFS" }, - { NExtraID::kUnix0, "UNIX" }, - { NExtraID::kStrongEncrypt, "StrongCrypto" }, - { NExtraID::kUnixTime, "UT" }, - { NExtraID::kUnix1, "UX" }, - { NExtraID::kUnix2, "Ux" }, - { NExtraID::kUnixN, "ux" }, - { NExtraID::kIzUnicodeComment, "uc" }, - { NExtraID::kIzUnicodeName, "up" }, - { NExtraID::kIzNtSecurityDescriptor, "SD" }, - { NExtraID::kWzAES, "WzAES" }, - { NExtraID::kApkAlign, "ApkAlign" } -}; - -void CExtraSubBlock::PrintInfo(AString &s) const -{ - for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_ExtraTypes); i++) - { - const CUInt32PCharPair &pair = g_ExtraTypes[i]; - if (pair.Value == ID) - { - s += pair.Name; - if (ID == NExtraID::kUnixTime) - { - if (Data.Size() >= 1) - { - s.Add_Colon(); - const Byte flags = Data[0]; - if (flags & 1) s.Add_Char('M'); - if (flags & 2) s.Add_Char('A'); - if (flags & 4) s.Add_Char('C'); - const UInt32 size = (UInt32)(Data.Size()) - 1; - if (size % 4 == 0) - { - s.Add_Colon(); - s.Add_UInt32(size / 4); - } - } - } - /* - if (ID == NExtraID::kApkAlign && Data.Size() >= 2) - { - char sz[32]; - sz[0] = ':'; - ConvertUInt32ToHex(GetUi16(Data), sz + 1); - s += sz; - for (unsigned j = 2; j < Data.Size(); j++) - { - char sz[32]; - sz[0] = '-'; - ConvertUInt32ToHex(Data[j], sz + 1); - s += sz; - } - } - */ - return; - } - } - { - char sz[16]; - sz[0] = '0'; - sz[1] = 'x'; - ConvertUInt32ToHex(ID, sz + 2); - s += sz; - } -} - - -void CExtraBlock::PrintInfo(AString &s) const -{ - if (Error) - s.Add_OptSpaced("Extra_ERROR"); - - if (MinorError) - s.Add_OptSpaced("Minor_Extra_ERROR"); - - if (IsZip64 || IsZip64_Error) - { - s.Add_OptSpaced("Zip64"); - if (IsZip64_Error) - s += "_ERROR"; - } - - FOR_VECTOR (i, SubBlocks) - { - s.Add_Space_if_NotEmpty(); - SubBlocks[i].PrintInfo(s); - } -} - - -bool CExtraSubBlock::ExtractNtfsTime(unsigned index, FILETIME &ft) const -{ - ft.dwHighDateTime = ft.dwLowDateTime = 0; - UInt32 size = (UInt32)Data.Size(); - if (ID != NExtraID::kNTFS || size < 32) - return false; - const Byte *p = (const Byte *)Data; - p += 4; // for reserved - size -= 4; - while (size > 4) - { - UInt16 tag = GetUi16(p); - unsigned attrSize = GetUi16(p + 2); - p += 4; - size -= 4; - if (attrSize > size) - attrSize = size; - - if (tag == NNtfsExtra::kTagTime && attrSize >= 24) - { - p += 8 * index; - ft.dwLowDateTime = GetUi32(p); - ft.dwHighDateTime = GetUi32(p + 4); - return true; - } - p += attrSize; - size -= attrSize; - } - return false; -} - -bool CExtraSubBlock::Extract_UnixTime(bool isCentral, unsigned index, UInt32 &res) const -{ - /* Info-Zip : - The central-header extra field contains the modification - time only, or no timestamp at all. - Size of Data is used to flag its presence or absence - If "Flags" indicates that Modtime is present in the local header - field, it MUST be present in the central header field, too - */ - - res = 0; - UInt32 size = (UInt32)Data.Size(); - if (ID != NExtraID::kUnixTime || size < 5) - return false; - const Byte *p = (const Byte *)Data; - const Byte flags = *p++; - size--; - if (isCentral) - { - if (index != NUnixTime::kMTime || - (flags & (1 << NUnixTime::kMTime)) == 0 || - size < 4) - return false; - res = GetUi32(p); - return true; - } - for (unsigned i = 0; i < 3; i++) - if ((flags & (1 << i)) != 0) - { - if (size < 4) - return false; - if (index == i) - { - res = GetUi32(p); - return true; - } - p += 4; - size -= 4; - } - return false; -} - - -// Info-ZIP's abandoned "Unix1 timestamps & owner ID info" - -bool CExtraSubBlock::Extract_Unix01_Time(unsigned index, UInt32 &res) const -{ - res = 0; - const unsigned offset = index * 4; - if (Data.Size() < offset + 4) - return false; - if (ID != NExtraID::kUnix0 && - ID != NExtraID::kUnix1) - return false; - const Byte *p = (const Byte *)Data + offset; - res = GetUi32(p); - return true; -} - -/* -// PKWARE's Unix "extra" is similar to Info-ZIP's abandoned "Unix1 timestamps" -bool CExtraSubBlock::Extract_Unix_Time(unsigned index, UInt32 &res) const -{ - res = 0; - const unsigned offset = index * 4; - if (ID != NExtraID::kUnix0 || Data.Size() < offset) - return false; - const Byte *p = (const Byte *)Data + offset; - res = GetUi32(p); - return true; -} -*/ - -bool CExtraBlock::GetNtfsTime(unsigned index, FILETIME &ft) const -{ - FOR_VECTOR (i, SubBlocks) - { - const CExtraSubBlock &sb = SubBlocks[i]; - if (sb.ID == NFileHeader::NExtraID::kNTFS) - return sb.ExtractNtfsTime(index, ft); - } - return false; -} - -bool CExtraBlock::GetUnixTime(bool isCentral, unsigned index, UInt32 &res) const -{ - { - FOR_VECTOR (i, SubBlocks) - { - const CExtraSubBlock &sb = SubBlocks[i]; - if (sb.ID == NFileHeader::NExtraID::kUnixTime) - return sb.Extract_UnixTime(isCentral, index, res); - } - } - - switch (index) - { - case NUnixTime::kMTime: index = NUnixExtra::kMTime; break; - case NUnixTime::kATime: index = NUnixExtra::kATime; break; - default: return false; - } - - { - FOR_VECTOR (i, SubBlocks) - { - const CExtraSubBlock &sb = SubBlocks[i]; - if (sb.ID == NFileHeader::NExtraID::kUnix0 || - sb.ID == NFileHeader::NExtraID::kUnix1) - return sb.Extract_Unix01_Time(index, res); - } - } - return false; -} - - -bool CLocalItem::IsDir() const -{ - return NItemName::HasTailSlash(Name, GetCodePage()); -} - -bool CItem::IsDir() const -{ - // FIXME: we can check InfoZip UTF-8 name at first. - if (NItemName::HasTailSlash(Name, GetCodePage())) - return true; - - Byte hostOS = GetHostOS(); - - if (Size == 0 && PackSize == 0 && !Name.IsEmpty() && Name.Back() == '\\') - { - // do we need to use CharPrevExA? - // .NET Framework 4.5 : System.IO.Compression::CreateFromDirectory() probably writes backslashes to headers? - // so we support that case - switch (hostOS) - { - case NHostOS::kFAT: - case NHostOS::kNTFS: - case NHostOS::kHPFS: - case NHostOS::kVFAT: - return true; - default: break; - } - } - - if (!FromCentral) - return false; - - UInt16 highAttrib = (UInt16)((ExternalAttrib >> 16 ) & 0xFFFF); - - switch (hostOS) - { - case NHostOS::kAMIGA: - switch (highAttrib & NAmigaAttrib::kIFMT) - { - case NAmigaAttrib::kIFDIR: return true; - case NAmigaAttrib::kIFREG: return false; - default: return false; // change it throw kUnknownAttributes; - } - case NHostOS::kFAT: - case NHostOS::kNTFS: - case NHostOS::kHPFS: - case NHostOS::kVFAT: - return ((ExternalAttrib & FILE_ATTRIBUTE_DIRECTORY) != 0); - case NHostOS::kAtari: - case NHostOS::kMac: - case NHostOS::kVMS: - case NHostOS::kVM_CMS: - case NHostOS::kAcorn: - case NHostOS::kMVS: - return false; // change it throw kUnknownAttributes; - case NHostOS::kUnix: - return MY_LIN_S_ISDIR(highAttrib); - default: - return false; - } -} - -UInt32 CItem::GetWinAttrib() const -{ - UInt32 winAttrib = 0; - switch (GetHostOS()) - { - case NHostOS::kFAT: - case NHostOS::kNTFS: - if (FromCentral) - winAttrib = ExternalAttrib; - break; - case NHostOS::kUnix: - // do we need to clear 16 low bits in this case? - if (FromCentral) - { - /* - Some programs write posix attributes in high 16 bits of ExternalAttrib - Also some programs can write additional marker flag: - 0x8000 - p7zip - 0x4000 - Zip in MacOS - no marker - Info-Zip - - Client code has two options to detect posix field: - 1) check 0x8000 marker. In that case we must add 0x8000 marker here. - 2) check that high 4 bits (file type bits in posix field) of attributes are not zero. - */ - - winAttrib = ExternalAttrib & 0xFFFF0000; - - // #ifndef _WIN32 - winAttrib |= 0x8000; // add posix mode marker - // #endif - } - break; - default: break; - } - if (IsDir()) // test it; - winAttrib |= FILE_ATTRIBUTE_DIRECTORY; - return winAttrib; -} - -bool CItem::GetPosixAttrib(UInt32 &attrib) const -{ - // some archivers can store PosixAttrib in high 16 bits even with HostOS=FAT. - if (FromCentral && GetHostOS() == NHostOS::kUnix) - { - attrib = ExternalAttrib >> 16; - return (attrib != 0); - } - attrib = 0; - if (IsDir()) - attrib = MY_LIN_S_IFDIR; - return false; -} - - -bool CExtraSubBlock::CheckIzUnicode(const AString &s) const -{ - size_t size = Data.Size(); - if (size < 1 + 4) - return false; - const Byte *p = (const Byte *)Data; - if (p[0] > 1) - return false; - if (CrcCalc(s, s.Len()) != GetUi32(p + 1)) - return false; - size -= 5; - p += 5; - for (size_t i = 0; i < size; i++) - if (p[i] == 0) - return false; - return Check_UTF8_Buf((const char *)(const void *)p, size, false); -} - - -void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, bool useSpecifiedCodePage, UINT codePage) const -{ - bool isUtf8 = IsUtf8(); - // bool ignore_Utf8_Errors = true; - - if (!isUtf8) - { - { - const unsigned id = isComment ? - NFileHeader::NExtraID::kIzUnicodeComment: - NFileHeader::NExtraID::kIzUnicodeName; - const CObjectVector &subBlocks = GetMainExtra().SubBlocks; - - FOR_VECTOR (i, subBlocks) - { - const CExtraSubBlock &sb = subBlocks[i]; - if (sb.ID == id) - { - if (sb.CheckIzUnicode(s)) - { - // const unsigned kIzUnicodeHeaderSize = 5; - if (Convert_UTF8_Buf_To_Unicode( - (const char *)(const void *)(const Byte *)sb.Data + 5, - sb.Data.Size() - 5, res)) - return; - } - break; - } - } - } - - if (useSpecifiedCodePage) - isUtf8 = (codePage == CP_UTF8); - #ifdef _WIN32 - else if (GetHostOS() == NFileHeader::NHostOS::kUnix) - { - /* Some ZIP archives in Unix use UTF-8 encoding without Utf8 flag in header. - We try to get name as UTF-8. - Do we need to do it in POSIX version also? */ - isUtf8 = true; - - /* 21.02: we want to ignore UTF-8 errors to support file paths that are mixed - of UTF-8 and non-UTF-8 characters. */ - // ignore_Utf8_Errors = false; - // ignore_Utf8_Errors = true; - } - #endif - } - - #ifndef _WIN32 - - // Convert OEM char set to UTF-8 if needed - // Use system locale to select code page - - // locale -> code page translation tables generated from Wine source code - - const char *lcToOemTable[] = { - "af_ZA", "CP850", "ar_SA", "CP720", "ar_LB", "CP720", "ar_EG", "CP720", - "ar_DZ", "CP720", "ar_BH", "CP720", "ar_IQ", "CP720", "ar_JO", "CP720", - "ar_KW", "CP720", "ar_LY", "CP720", "ar_MA", "CP720", "ar_OM", "CP720", - "ar_QA", "CP720", "ar_SY", "CP720", "ar_TN", "CP720", "ar_AE", "CP720", - "ar_YE", "CP720", "ast_ES", "CP850", "az_AZ", "CP866", "az_AZ", "CP857", - "be_BY", "CP866", "bg_BG", "CP866", "br_FR", "CP850", "ca_ES", "CP850", - "zh_CN", "CP936", "zh_TW", "CP950", "kw_GB", "CP850", "cs_CZ", "CP852", - "cy_GB", "CP850", "da_DK", "CP850", "de_AT", "CP850", "de_LI", "CP850", - "de_LU", "CP850", "de_CH", "CP850", "de_DE", "CP850", "el_GR", "CP737", - "en_AU", "CP850", "en_CA", "CP850", "en_GB", "CP850", "en_IE", "CP850", - "en_JM", "CP850", "en_BZ", "CP850", "en_PH", "CP437", "en_ZA", "CP437", - "en_TT", "CP850", "en_US", "CP437", "en_ZW", "CP437", "en_NZ", "CP850", - "es_PA", "CP850", "es_BO", "CP850", "es_CR", "CP850", "es_DO", "CP850", - "es_SV", "CP850", "es_EC", "CP850", "es_GT", "CP850", "es_HN", "CP850", - "es_NI", "CP850", "es_CL", "CP850", "es_MX", "CP850", "es_ES", "CP850", - "es_CO", "CP850", "es_ES", "CP850", "es_PE", "CP850", "es_AR", "CP850", - "es_PR", "CP850", "es_VE", "CP850", "es_UY", "CP850", "es_PY", "CP850", - "et_EE", "CP775", "eu_ES", "CP850", "fa_IR", "CP720", "fi_FI", "CP850", - "fo_FO", "CP850", "fr_FR", "CP850", "fr_BE", "CP850", "fr_CA", "CP850", - "fr_LU", "CP850", "fr_MC", "CP850", "fr_CH", "CP850", "ga_IE", "CP437", - "gd_GB", "CP850", "gv_IM", "CP850", "gl_ES", "CP850", "he_IL", "CP862", - "hr_HR", "CP852", "hu_HU", "CP852", "id_ID", "CP850", "is_IS", "CP850", - "it_IT", "CP850", "it_CH", "CP850", "iv_IV", "CP437", "ja_JP", "CP932", - "kk_KZ", "CP866", "ko_KR", "CP949", "ky_KG", "CP866", "lt_LT", "CP775", - "lv_LV", "CP775", "mk_MK", "CP866", "mn_MN", "CP866", "ms_BN", "CP850", - "ms_MY", "CP850", "nl_BE", "CP850", "nl_NL", "CP850", "nl_SR", "CP850", - "nn_NO", "CP850", "nb_NO", "CP850", "pl_PL", "CP852", "pt_BR", "CP850", - "pt_PT", "CP850", "rm_CH", "CP850", "ro_RO", "CP852", "ru_RU", "CP866", - "sk_SK", "CP852", "sl_SI", "CP852", "sq_AL", "CP852", "sr_RS", "CP855", - "sr_RS", "CP852", "sv_SE", "CP850", "sv_FI", "CP850", "sw_KE", "CP437", - "th_TH", "CP874", "tr_TR", "CP857", "tt_RU", "CP866", "uk_UA", "CP866", - "ur_PK", "CP720", "uz_UZ", "CP866", "uz_UZ", "CP857", "vi_VN", "CP1258", - "wa_BE", "CP850", "zh_HK", "CP950", "zh_SG", "CP936"}; - - const char *lcToAnsiTable[] = { - "af_ZA", "CP1252", "ar_SA", "CP1256", "ar_LB", "CP1256", "ar_EG", "CP1256", - "ar_DZ", "CP1256", "ar_BH", "CP1256", "ar_IQ", "CP1256", "ar_JO", "CP1256", - "ar_KW", "CP1256", "ar_LY", "CP1256", "ar_MA", "CP1256", "ar_OM", "CP1256", - "ar_QA", "CP1256", "ar_SY", "CP1256", "ar_TN", "CP1256", "ar_AE", "CP1256", - "ar_YE", "CP1256","ast_ES", "CP1252", "az_AZ", "CP1251", "az_AZ", "CP1254", - "be_BY", "CP1251", "bg_BG", "CP1251", "br_FR", "CP1252", "ca_ES", "CP1252", - "zh_CN", "CP936", "zh_TW", "CP950", "kw_GB", "CP1252", "cs_CZ", "CP1250", - "cy_GB", "CP1252", "da_DK", "CP1252", "de_AT", "CP1252", "de_LI", "CP1252", - "de_LU", "CP1252", "de_CH", "CP1252", "de_DE", "CP1252", "el_GR", "CP1253", - "en_AU", "CP1252", "en_CA", "CP1252", "en_GB", "CP1252", "en_IE", "CP1252", - "en_JM", "CP1252", "en_BZ", "CP1252", "en_PH", "CP1252", "en_ZA", "CP1252", - "en_TT", "CP1252", "en_US", "CP1252", "en_ZW", "CP1252", "en_NZ", "CP1252", - "es_PA", "CP1252", "es_BO", "CP1252", "es_CR", "CP1252", "es_DO", "CP1252", - "es_SV", "CP1252", "es_EC", "CP1252", "es_GT", "CP1252", "es_HN", "CP1252", - "es_NI", "CP1252", "es_CL", "CP1252", "es_MX", "CP1252", "es_ES", "CP1252", - "es_CO", "CP1252", "es_ES", "CP1252", "es_PE", "CP1252", "es_AR", "CP1252", - "es_PR", "CP1252", "es_VE", "CP1252", "es_UY", "CP1252", "es_PY", "CP1252", - "et_EE", "CP1257", "eu_ES", "CP1252", "fa_IR", "CP1256", "fi_FI", "CP1252", - "fo_FO", "CP1252", "fr_FR", "CP1252", "fr_BE", "CP1252", "fr_CA", "CP1252", - "fr_LU", "CP1252", "fr_MC", "CP1252", "fr_CH", "CP1252", "ga_IE", "CP1252", - "gd_GB", "CP1252", "gv_IM", "CP1252", "gl_ES", "CP1252", "he_IL", "CP1255", - "hr_HR", "CP1250", "hu_HU", "CP1250", "id_ID", "CP1252", "is_IS", "CP1252", - "it_IT", "CP1252", "it_CH", "CP1252", "iv_IV", "CP1252", "ja_JP", "CP932", - "kk_KZ", "CP1251", "ko_KR", "CP949", "ky_KG", "CP1251", "lt_LT", "CP1257", - "lv_LV", "CP1257", "mk_MK", "CP1251", "mn_MN", "CP1251", "ms_BN", "CP1252", - "ms_MY", "CP1252", "nl_BE", "CP1252", "nl_NL", "CP1252", "nl_SR", "CP1252", - "nn_NO", "CP1252", "nb_NO", "CP1252", "pl_PL", "CP1250", "pt_BR", "CP1252", - "pt_PT", "CP1252", "rm_CH", "CP1252", "ro_RO", "CP1250", "ru_RU", "CP1251", - "sk_SK", "CP1250", "sl_SI", "CP1250", "sq_AL", "CP1250", "sr_RS", "CP1251", - "sr_RS", "CP1250", "sv_SE", "CP1252", "sv_FI", "CP1252", "sw_KE", "CP1252", - "th_TH", "CP874", "tr_TR", "CP1254", "tt_RU", "CP1251", "uk_UA", "CP1251", - "ur_PK", "CP1256", "uz_UZ", "CP1251", "uz_UZ", "CP1254", "vi_VN", "CP1258", - "wa_BE", "CP1252", "zh_HK", "CP950", "zh_SG", "CP936"}; - - bool isAnsi = false; - bool isOem = false; - - if (!isUtf8 && - MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS && - MadeByVersion.Version >= 20) { - isAnsi = true; - } else if (!isUtf8 && - (MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS || - MadeByVersion.HostOS == NFileHeader::NHostOS::kFAT)) { - isOem = true; - } - - if (isOem || isAnsi) { - - const char *legacyCp = nullptr; - int tableLen = sizeof(isOem ? lcToOemTable : lcToAnsiTable) / sizeof(char *); - int lcLen = 0, i; - - // Detect required code page name from current locale - char *lc = setlocale(LC_CTYPE, ""); - - if (lc && lc[0]) { - // Compare up to the dot, if it exists, e.g. en_US.UTF-8 - for (lcLen = 0; lc[lcLen] != '.' && lc[lcLen] != ':' && lc[lcLen] != '\0'; ++lcLen); - - for (i = 0; i < tableLen; i += 2) - if (strncmp(lc, (isOem ? lcToOemTable[i] : lcToAnsiTable[i]), lcLen) == 0) { - legacyCp = isOem ? lcToOemTable[i + 1] : lcToAnsiTable[i + 1]; - break; // Stop searching once a match is found - } - } - - if (legacyCp) { - iconv_t cd; - if ((cd = iconv_open("UTF-8", legacyCp)) != (iconv_t)-1) { - - AString s_utf8; - - size_t slen = s.Len(); - char* src = s.Ptr_non_const(); - - size_t dlen = slen * 4 + 1; // (source length * 4) + null termination - char* dst = s_utf8.GetBuf_SetEnd(dlen); - - memset(dst, 0, dlen); - - size_t done = iconv(cd, &src, &slen, &dst, &dlen); - - if (done == (size_t)-1) { - iconv_close(cd); - - // iconv failed. Falling back to default behavior - MultiByteToUnicodeString2(res, s, useSpecifiedCodePage ? codePage : GetCodePage()); - return; - } - - // Null-terminate the result - *dst = '\0'; - - iconv_close(cd); - - if (ConvertUTF8ToUnicode(s_utf8, res) /*|| ignore_Utf8_Errors*/) { - return; - } - } - } - } - #endif - - if (isUtf8) - { - ConvertUTF8ToUnicode(s, res); - return; - } - - MultiByteToUnicodeString2(res, s, useSpecifiedCodePage ? codePage : GetCodePage()); -} - -}} +// Archive/ZipItem.cpp + +#include "StdAfx.h" + +#include "../../../../C/CpuArch.h" +#include "../../../../C/7zCrc.h" + +#include "../../../Common/IntToString.h" +#include "../../../Common/MyLinux.h" +#include "../../../Common/StringConvert.h" + +#include "../../../Windows/PropVariantUtils.h" + +#include "../Common/ItemNameUtils.h" + +#include "ZipItem.h" + +namespace NArchive { +namespace NZip { + +using namespace NFileHeader; + + +/* +const char *k_SpecName_NTFS_STREAM = "@@NTFS@STREAM@"; +const char *k_SpecName_MAC_RESOURCE_FORK = "@@MAC@RESOURCE-FORK@"; +*/ + +static const CUInt32PCharPair g_ExtraTypes[] = +{ + { NExtraID::kZip64, "Zip64" }, + { NExtraID::kNTFS, "NTFS" }, + { NExtraID::kUnix0, "UNIX" }, + { NExtraID::kStrongEncrypt, "StrongCrypto" }, + { NExtraID::kUnixTime, "UT" }, + { NExtraID::kUnix1, "UX" }, + { NExtraID::kUnix2, "Ux" }, + { NExtraID::kUnixN, "ux" }, + { NExtraID::kIzUnicodeComment, "uc" }, + { NExtraID::kIzUnicodeName, "up" }, + { NExtraID::kIzNtSecurityDescriptor, "SD" }, + { NExtraID::kWzAES, "WzAES" }, + { NExtraID::kApkAlign, "ApkAlign" } +}; + +void CExtraSubBlock::PrintInfo(AString &s) const +{ + for (unsigned i = 0; i < ARRAY_SIZE(g_ExtraTypes); i++) + { + const CUInt32PCharPair &pair = g_ExtraTypes[i]; + if (pair.Value == ID) + { + s += pair.Name; + if (ID == NExtraID::kUnixTime) + { + if (Data.Size() >= 1) + { + s += ':'; + const Byte flags = Data[0]; + if (flags & 1) s += 'M'; + if (flags & 2) s += 'A'; + if (flags & 4) s += 'C'; + const UInt32 size = (UInt32)(Data.Size()) - 1; + if (size % 4 == 0) + { + s += ':'; + s.Add_UInt32(size / 4); + } + } + } + /* + if (ID == NExtraID::kApkAlign && Data.Size() >= 2) + { + char sz[32]; + sz[0] = ':'; + ConvertUInt32ToHex(GetUi16(Data), sz + 1); + s += sz; + for (unsigned j = 2; j < Data.Size(); j++) + { + char sz[32]; + sz[0] = '-'; + ConvertUInt32ToHex(Data[j], sz + 1); + s += sz; + } + } + */ + return; + } + } + { + char sz[32]; + sz[0] = '0'; + sz[1] = 'x'; + ConvertUInt32ToHex(ID, sz + 2); + s += sz; + } +} + + +void CExtraBlock::PrintInfo(AString &s) const +{ + if (Error) + s.Add_OptSpaced("Extra_ERROR"); + + if (MinorError) + s.Add_OptSpaced("Minor_Extra_ERROR"); + + if (IsZip64 || IsZip64_Error) + { + s.Add_OptSpaced("Zip64"); + if (IsZip64_Error) + s += "_ERROR"; + } + + FOR_VECTOR (i, SubBlocks) + { + s.Add_Space_if_NotEmpty(); + SubBlocks[i].PrintInfo(s); + } +} + + +bool CExtraSubBlock::ExtractNtfsTime(unsigned index, FILETIME &ft) const +{ + ft.dwHighDateTime = ft.dwLowDateTime = 0; + UInt32 size = (UInt32)Data.Size(); + if (ID != NExtraID::kNTFS || size < 32) + return false; + const Byte *p = (const Byte *)Data; + p += 4; // for reserved + size -= 4; + while (size > 4) + { + UInt16 tag = GetUi16(p); + unsigned attrSize = GetUi16(p + 2); + p += 4; + size -= 4; + if (attrSize > size) + attrSize = size; + + if (tag == NNtfsExtra::kTagTime && attrSize >= 24) + { + p += 8 * index; + ft.dwLowDateTime = GetUi32(p); + ft.dwHighDateTime = GetUi32(p + 4); + return true; + } + p += attrSize; + size -= attrSize; + } + return false; +} + +bool CExtraSubBlock::Extract_UnixTime(bool isCentral, unsigned index, UInt32 &res) const +{ + /* Info-Zip : + The central-header extra field contains the modification + time only, or no timestamp at all. + Size of Data is used to flag its presence or absence + If "Flags" indicates that Modtime is present in the local header + field, it MUST be present in the central header field, too + */ + + res = 0; + UInt32 size = (UInt32)Data.Size(); + if (ID != NExtraID::kUnixTime || size < 5) + return false; + const Byte *p = (const Byte *)Data; + const Byte flags = *p++; + size--; + if (isCentral) + { + if (index != NUnixTime::kMTime || + (flags & (1 << NUnixTime::kMTime)) == 0 || + size < 4) + return false; + res = GetUi32(p); + return true; + } + for (unsigned i = 0; i < 3; i++) + if ((flags & (1 << i)) != 0) + { + if (size < 4) + return false; + if (index == i) + { + res = GetUi32(p); + return true; + } + p += 4; + size -= 4; + } + return false; +} + + +// Info-ZIP's abandoned "Unix1 timestamps & owner ID info" + +bool CExtraSubBlock::Extract_Unix01_Time(unsigned index, UInt32 &res) const +{ + res = 0; + const unsigned offset = index * 4; + if (Data.Size() < offset + 4) + return false; + if (ID != NExtraID::kUnix0 && + ID != NExtraID::kUnix1) + return false; + const Byte *p = (const Byte *)Data + offset; + res = GetUi32(p); + return true; +} + +/* +// PKWARE's Unix "extra" is similar to Info-ZIP's abandoned "Unix1 timestamps" +bool CExtraSubBlock::Extract_Unix_Time(unsigned index, UInt32 &res) const +{ + res = 0; + const unsigned offset = index * 4; + if (ID != NExtraID::kUnix0 || Data.Size() < offset) + return false; + const Byte *p = (const Byte *)Data + offset; + res = GetUi32(p); + return true; +} +*/ + +bool CExtraBlock::GetNtfsTime(unsigned index, FILETIME &ft) const +{ + FOR_VECTOR (i, SubBlocks) + { + const CExtraSubBlock &sb = SubBlocks[i]; + if (sb.ID == NFileHeader::NExtraID::kNTFS) + return sb.ExtractNtfsTime(index, ft); + } + return false; +} + +bool CExtraBlock::GetUnixTime(bool isCentral, unsigned index, UInt32 &res) const +{ + { + FOR_VECTOR (i, SubBlocks) + { + const CExtraSubBlock &sb = SubBlocks[i]; + if (sb.ID == NFileHeader::NExtraID::kUnixTime) + return sb.Extract_UnixTime(isCentral, index, res); + } + } + + switch (index) + { + case NUnixTime::kMTime: index = NUnixExtra::kMTime; break; + case NUnixTime::kATime: index = NUnixExtra::kATime; break; + default: return false; + } + + { + FOR_VECTOR (i, SubBlocks) + { + const CExtraSubBlock &sb = SubBlocks[i]; + if (sb.ID == NFileHeader::NExtraID::kUnix0 || + sb.ID == NFileHeader::NExtraID::kUnix1) + return sb.Extract_Unix01_Time(index, res); + } + } + return false; +} + + +bool CLocalItem::IsDir() const +{ + return NItemName::HasTailSlash(Name, GetCodePage()); +} + +bool CItem::IsDir() const +{ + // FIXME: we can check InfoZip UTF-8 name at first. + if (NItemName::HasTailSlash(Name, GetCodePage())) + return true; + + Byte hostOS = GetHostOS(); + + if (Size == 0 && PackSize == 0 && !Name.IsEmpty() && Name.Back() == '\\') + { + // do we need to use CharPrevExA? + // .NET Framework 4.5 : System.IO.Compression::CreateFromDirectory() probably writes backslashes to headers? + // so we support that case + switch (hostOS) + { + case NHostOS::kFAT: + case NHostOS::kNTFS: + case NHostOS::kHPFS: + case NHostOS::kVFAT: + return true; + } + } + + if (!FromCentral) + return false; + + UInt16 highAttrib = (UInt16)((ExternalAttrib >> 16 ) & 0xFFFF); + + switch (hostOS) + { + case NHostOS::kAMIGA: + switch (highAttrib & NAmigaAttrib::kIFMT) + { + case NAmigaAttrib::kIFDIR: return true; + case NAmigaAttrib::kIFREG: return false; + default: return false; // change it throw kUnknownAttributes; + } + case NHostOS::kFAT: + case NHostOS::kNTFS: + case NHostOS::kHPFS: + case NHostOS::kVFAT: + return ((ExternalAttrib & FILE_ATTRIBUTE_DIRECTORY) != 0); + case NHostOS::kAtari: + case NHostOS::kMac: + case NHostOS::kVMS: + case NHostOS::kVM_CMS: + case NHostOS::kAcorn: + case NHostOS::kMVS: + return false; // change it throw kUnknownAttributes; + case NHostOS::kUnix: + return MY_LIN_S_ISDIR(highAttrib); + default: + return false; + } +} + +UInt32 CItem::GetWinAttrib() const +{ + UInt32 winAttrib = 0; + switch (GetHostOS()) + { + case NHostOS::kFAT: + case NHostOS::kNTFS: + if (FromCentral) + winAttrib = ExternalAttrib; + break; + case NHostOS::kUnix: + // do we need to clear 16 low bits in this case? + if (FromCentral) + { + /* + Some programs write posix attributes in high 16 bits of ExternalAttrib + Also some programs can write additional marker flag: + 0x8000 - p7zip + 0x4000 - Zip in MacOS + no marker - Info-Zip + + Client code has two options to detect posix field: + 1) check 0x8000 marker. In that case we must add 0x8000 marker here. + 2) check that high 4 bits (file type bits in posix field) of attributes are not zero. + */ + + winAttrib = ExternalAttrib & 0xFFFF0000; + + // #ifndef _WIN32 + winAttrib |= 0x8000; // add posix mode marker + // #endif + } + break; + } + if (IsDir()) // test it; + winAttrib |= FILE_ATTRIBUTE_DIRECTORY; + return winAttrib; +} + +bool CItem::GetPosixAttrib(UInt32 &attrib) const +{ + // some archivers can store PosixAttrib in high 16 bits even with HostOS=FAT. + if (FromCentral && GetHostOS() == NHostOS::kUnix) + { + attrib = ExternalAttrib >> 16; + return (attrib != 0); + } + attrib = 0; + if (IsDir()) + attrib = MY_LIN_S_IFDIR; + return false; +} + + +bool CExtraSubBlock::CheckIzUnicode(const AString &s) const +{ + size_t size = Data.Size(); + if (size < 1 + 4) + return false; + const Byte *p = (const Byte *)Data; + if (p[0] > 1) + return false; + if (CrcCalc(s, s.Len()) != GetUi32(p + 1)) + return false; + size -= 5; + p += 5; + for (size_t i = 0; i < size; i++) + if (p[i] == 0) + return false; + return Check_UTF8_Buf((const char *)(const void *)p, size, false); +} + + +void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, bool useSpecifiedCodePage, UINT codePage) const +{ + bool isUtf8 = IsUtf8(); + // bool ignore_Utf8_Errors = true; + + if (!isUtf8) + { + { + const unsigned id = isComment ? + NFileHeader::NExtraID::kIzUnicodeComment: + NFileHeader::NExtraID::kIzUnicodeName; + const CObjectVector &subBlocks = GetMainExtra().SubBlocks; + + FOR_VECTOR (i, subBlocks) + { + const CExtraSubBlock &sb = subBlocks[i]; + if (sb.ID == id) + { + if (sb.CheckIzUnicode(s)) + { + // const unsigned kIzUnicodeHeaderSize = 5; + if (Convert_UTF8_Buf_To_Unicode( + (const char *)(const void *)(const Byte *)sb.Data + 5, + sb.Data.Size() - 5, res)) + return; + } + break; + } + } + } + + if (useSpecifiedCodePage) + isUtf8 = (codePage == CP_UTF8); + #ifdef _WIN32 + else if (GetHostOS() == NFileHeader::NHostOS::kUnix) + { + /* Some ZIP archives in Unix use UTF-8 encoding without Utf8 flag in header. + We try to get name as UTF-8. + Do we need to do it in POSIX version also? */ + isUtf8 = true; + + /* 21.02: we want to ignore UTF-8 errors to support file paths that are mixed + of UTF-8 and non-UTF-8 characters. */ + // ignore_Utf8_Errors = false; + // ignore_Utf8_Errors = true; + } + #endif + } + + #ifndef _WIN32 + + // Convert OEM char set to UTF-8 if needed + // Use system locale to select code page + + // locale -> code page translation tables generated from Wine source code + + const char *lcToOemTable[] = { + "af_ZA", "CP850", "ar_SA", "CP720", "ar_LB", "CP720", "ar_EG", "CP720", + "ar_DZ", "CP720", "ar_BH", "CP720", "ar_IQ", "CP720", "ar_JO", "CP720", + "ar_KW", "CP720", "ar_LY", "CP720", "ar_MA", "CP720", "ar_OM", "CP720", + "ar_QA", "CP720", "ar_SY", "CP720", "ar_TN", "CP720", "ar_AE", "CP720", + "ar_YE", "CP720", "ast_ES", "CP850", "az_AZ", "CP866", "az_AZ", "CP857", + "be_BY", "CP866", "bg_BG", "CP866", "br_FR", "CP850", "ca_ES", "CP850", + "zh_CN", "CP936", "zh_TW", "CP950", "kw_GB", "CP850", "cs_CZ", "CP852", + "cy_GB", "CP850", "da_DK", "CP850", "de_AT", "CP850", "de_LI", "CP850", + "de_LU", "CP850", "de_CH", "CP850", "de_DE", "CP850", "el_GR", "CP737", + "en_AU", "CP850", "en_CA", "CP850", "en_GB", "CP850", "en_IE", "CP850", + "en_JM", "CP850", "en_BZ", "CP850", "en_PH", "CP437", "en_ZA", "CP437", + "en_TT", "CP850", "en_US", "CP437", "en_ZW", "CP437", "en_NZ", "CP850", + "es_PA", "CP850", "es_BO", "CP850", "es_CR", "CP850", "es_DO", "CP850", + "es_SV", "CP850", "es_EC", "CP850", "es_GT", "CP850", "es_HN", "CP850", + "es_NI", "CP850", "es_CL", "CP850", "es_MX", "CP850", "es_ES", "CP850", + "es_CO", "CP850", "es_ES", "CP850", "es_PE", "CP850", "es_AR", "CP850", + "es_PR", "CP850", "es_VE", "CP850", "es_UY", "CP850", "es_PY", "CP850", + "et_EE", "CP775", "eu_ES", "CP850", "fa_IR", "CP720", "fi_FI", "CP850", + "fo_FO", "CP850", "fr_FR", "CP850", "fr_BE", "CP850", "fr_CA", "CP850", + "fr_LU", "CP850", "fr_MC", "CP850", "fr_CH", "CP850", "ga_IE", "CP437", + "gd_GB", "CP850", "gv_IM", "CP850", "gl_ES", "CP850", "he_IL", "CP862", + "hr_HR", "CP852", "hu_HU", "CP852", "id_ID", "CP850", "is_IS", "CP850", + "it_IT", "CP850", "it_CH", "CP850", "iv_IV", "CP437", "ja_JP", "CP932", + "kk_KZ", "CP866", "ko_KR", "CP949", "ky_KG", "CP866", "lt_LT", "CP775", + "lv_LV", "CP775", "mk_MK", "CP866", "mn_MN", "CP866", "ms_BN", "CP850", + "ms_MY", "CP850", "nl_BE", "CP850", "nl_NL", "CP850", "nl_SR", "CP850", + "nn_NO", "CP850", "nb_NO", "CP850", "pl_PL", "CP852", "pt_BR", "CP850", + "pt_PT", "CP850", "rm_CH", "CP850", "ro_RO", "CP852", "ru_RU", "CP866", + "sk_SK", "CP852", "sl_SI", "CP852", "sq_AL", "CP852", "sr_RS", "CP855", + "sr_RS", "CP852", "sv_SE", "CP850", "sv_FI", "CP850", "sw_KE", "CP437", + "th_TH", "CP874", "tr_TR", "CP857", "tt_RU", "CP866", "uk_UA", "CP866", + "ur_PK", "CP720", "uz_UZ", "CP866", "uz_UZ", "CP857", "vi_VN", "CP1258", + "wa_BE", "CP850", "zh_HK", "CP950", "zh_SG", "CP936"}; + + const char *lcToAnsiTable[] = { + "af_ZA", "CP1252", "ar_SA", "CP1256", "ar_LB", "CP1256", "ar_EG", "CP1256", + "ar_DZ", "CP1256", "ar_BH", "CP1256", "ar_IQ", "CP1256", "ar_JO", "CP1256", + "ar_KW", "CP1256", "ar_LY", "CP1256", "ar_MA", "CP1256", "ar_OM", "CP1256", + "ar_QA", "CP1256", "ar_SY", "CP1256", "ar_TN", "CP1256", "ar_AE", "CP1256", + "ar_YE", "CP1256","ast_ES", "CP1252", "az_AZ", "CP1251", "az_AZ", "CP1254", + "be_BY", "CP1251", "bg_BG", "CP1251", "br_FR", "CP1252", "ca_ES", "CP1252", + "zh_CN", "CP936", "zh_TW", "CP950", "kw_GB", "CP1252", "cs_CZ", "CP1250", + "cy_GB", "CP1252", "da_DK", "CP1252", "de_AT", "CP1252", "de_LI", "CP1252", + "de_LU", "CP1252", "de_CH", "CP1252", "de_DE", "CP1252", "el_GR", "CP1253", + "en_AU", "CP1252", "en_CA", "CP1252", "en_GB", "CP1252", "en_IE", "CP1252", + "en_JM", "CP1252", "en_BZ", "CP1252", "en_PH", "CP1252", "en_ZA", "CP1252", + "en_TT", "CP1252", "en_US", "CP1252", "en_ZW", "CP1252", "en_NZ", "CP1252", + "es_PA", "CP1252", "es_BO", "CP1252", "es_CR", "CP1252", "es_DO", "CP1252", + "es_SV", "CP1252", "es_EC", "CP1252", "es_GT", "CP1252", "es_HN", "CP1252", + "es_NI", "CP1252", "es_CL", "CP1252", "es_MX", "CP1252", "es_ES", "CP1252", + "es_CO", "CP1252", "es_ES", "CP1252", "es_PE", "CP1252", "es_AR", "CP1252", + "es_PR", "CP1252", "es_VE", "CP1252", "es_UY", "CP1252", "es_PY", "CP1252", + "et_EE", "CP1257", "eu_ES", "CP1252", "fa_IR", "CP1256", "fi_FI", "CP1252", + "fo_FO", "CP1252", "fr_FR", "CP1252", "fr_BE", "CP1252", "fr_CA", "CP1252", + "fr_LU", "CP1252", "fr_MC", "CP1252", "fr_CH", "CP1252", "ga_IE", "CP1252", + "gd_GB", "CP1252", "gv_IM", "CP1252", "gl_ES", "CP1252", "he_IL", "CP1255", + "hr_HR", "CP1250", "hu_HU", "CP1250", "id_ID", "CP1252", "is_IS", "CP1252", + "it_IT", "CP1252", "it_CH", "CP1252", "iv_IV", "CP1252", "ja_JP", "CP932", + "kk_KZ", "CP1251", "ko_KR", "CP949", "ky_KG", "CP1251", "lt_LT", "CP1257", + "lv_LV", "CP1257", "mk_MK", "CP1251", "mn_MN", "CP1251", "ms_BN", "CP1252", + "ms_MY", "CP1252", "nl_BE", "CP1252", "nl_NL", "CP1252", "nl_SR", "CP1252", + "nn_NO", "CP1252", "nb_NO", "CP1252", "pl_PL", "CP1250", "pt_BR", "CP1252", + "pt_PT", "CP1252", "rm_CH", "CP1252", "ro_RO", "CP1250", "ru_RU", "CP1251", + "sk_SK", "CP1250", "sl_SI", "CP1250", "sq_AL", "CP1250", "sr_RS", "CP1251", + "sr_RS", "CP1250", "sv_SE", "CP1252", "sv_FI", "CP1252", "sw_KE", "CP1252", + "th_TH", "CP874", "tr_TR", "CP1254", "tt_RU", "CP1251", "uk_UA", "CP1251", + "ur_PK", "CP1256", "uz_UZ", "CP1251", "uz_UZ", "CP1254", "vi_VN", "CP1258", + "wa_BE", "CP1252", "zh_HK", "CP950", "zh_SG", "CP936"}; + + bool isAnsi = false; + bool isOem = false; + + if (!isUtf8 && + MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS && + MadeByVersion.Version >= 20) { + isAnsi = true; + } else if (!isUtf8 && + (MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS || + MadeByVersion.HostOS == NFileHeader::NHostOS::kFAT)) { + isOem = true; + } + + if (isOem || isAnsi) { + + const char *legacyCp = nullptr; + int tableLen = sizeof(isOem ? lcToOemTable : lcToAnsiTable) / sizeof(char *); + int lcLen = 0, i; + + // Detect required code page name from current locale + char *lc = setlocale(LC_CTYPE, ""); + + if (lc && lc[0]) { + // Compare up to the dot, if it exists, e.g. en_US.UTF-8 + for (lcLen = 0; lc[lcLen] != '.' && lc[lcLen] != ':' && lc[lcLen] != '\0'; ++lcLen); + + for (i = 0; i < tableLen; i += 2) + if (strncmp(lc, (isOem ? lcToOemTable[i] : lcToAnsiTable[i]), lcLen) == 0) { + legacyCp = isOem ? lcToOemTable[i + 1] : lcToAnsiTable[i + 1]; + break; // Stop searching once a match is found + } + } + + if (legacyCp) { + iconv_t cd; + if ((cd = iconv_open("UTF-8", legacyCp)) != (iconv_t)-1) { + + AString s_utf8; + + size_t slen = s.Len(); + char* src = s.Ptr_non_const(); + + size_t dlen = slen * 4 + 1; // (source length * 4) + null termination + char* dst = s_utf8.GetBuf_SetEnd(dlen); + + memset(dst, 0, dlen); + + size_t done = iconv(cd, &src, &slen, &dst, &dlen); + + if (done == (size_t)-1) { + iconv_close(cd); + + // iconv failed. Falling back to default behavior + MultiByteToUnicodeString2(res, s, useSpecifiedCodePage ? codePage : GetCodePage()); + return; + } + + // Null-terminate the result + *dst = '\0'; + + iconv_close(cd); + + if (ConvertUTF8ToUnicode(s_utf8, res) /*|| ignore_Utf8_Errors*/) { + return; + } + } + } + } + #endif + + if (isUtf8) + { + ConvertUTF8ToUnicode(s, res); + return; + } + + MultiByteToUnicodeString2(res, s, useSpecifiedCodePage ? codePage : GetCodePage()); +} + +}} From 64c0aa784a17e80b44b0decb65931f7d30abae81 Mon Sep 17 00:00:00 2001 From: Ivan Sorokin Date: Tue, 21 May 2024 21:46:11 +0200 Subject: [PATCH 04/14] added missing headers --- CPP/7zip/Archive/Zip/ZipItem.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CPP/7zip/Archive/Zip/ZipItem.cpp b/CPP/7zip/Archive/Zip/ZipItem.cpp index cc7908192..63065bb93 100644 --- a/CPP/7zip/Archive/Zip/ZipItem.cpp +++ b/CPP/7zip/Archive/Zip/ZipItem.cpp @@ -1,5 +1,10 @@ // Archive/ZipItem.cpp +#ifndef _WIN32 +#include +#include +#endif + #include "StdAfx.h" #include "../../../../C/CpuArch.h" From 4815bdabffa04ef66cc6107ce088384e84630306 Mon Sep 17 00:00:00 2001 From: Ivan Sorokin Date: Wed, 22 May 2024 07:22:34 +0200 Subject: [PATCH 05/14] apply fix from https://github.com/p7zip-project/p7zip/issues/112#issuecomment-1509959881 --- CPP/7zip/Archive/Zip/ZipItem.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/CPP/7zip/Archive/Zip/ZipItem.cpp b/CPP/7zip/Archive/Zip/ZipItem.cpp index 63065bb93..9122a5b93 100644 --- a/CPP/7zip/Archive/Zip/ZipItem.cpp +++ b/CPP/7zip/Archive/Zip/ZipItem.cpp @@ -575,6 +575,7 @@ void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, boo size_t dlen = slen * 4 + 1; // (source length * 4) + null termination char* dst = s_utf8.GetBuf_SetEnd(dlen); + const char* dstStart = dst; memset(dst, 0, dlen); @@ -593,9 +594,11 @@ void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, boo iconv_close(cd); - if (ConvertUTF8ToUnicode(s_utf8, res) /*|| ignore_Utf8_Errors*/) { + AString sUtf8CorrectLength; + unsigned dstCorrectLength = dst - dstStart; + sUtf8CorrectLength.SetFrom(sUtf8, dstCorrectLength); + if (ConvertUTF8ToUnicode(sUtf8CorrectLength, res) /*|| ignore_Utf8_Errors*/) return; - } } } } From fb026b73bd27b5d4694095f420aad162a6d882bd Mon Sep 17 00:00:00 2001 From: Ivan Sorokin Date: Wed, 22 May 2024 07:26:13 +0200 Subject: [PATCH 06/14] minor fix --- CPP/7zip/Archive/Zip/ZipItem.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CPP/7zip/Archive/Zip/ZipItem.cpp b/CPP/7zip/Archive/Zip/ZipItem.cpp index 9122a5b93..b8976b589 100644 --- a/CPP/7zip/Archive/Zip/ZipItem.cpp +++ b/CPP/7zip/Archive/Zip/ZipItem.cpp @@ -568,13 +568,13 @@ void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, boo iconv_t cd; if ((cd = iconv_open("UTF-8", legacyCp)) != (iconv_t)-1) { - AString s_utf8; + AString sUtf8; size_t slen = s.Len(); char* src = s.Ptr_non_const(); size_t dlen = slen * 4 + 1; // (source length * 4) + null termination - char* dst = s_utf8.GetBuf_SetEnd(dlen); + char* dst = sUtf8.GetBuf_SetEnd(dlen); const char* dstStart = dst; memset(dst, 0, dlen); From fc1e03a3cdfca39b4afa36db36c1947a09315c05 Mon Sep 17 00:00:00 2001 From: Ivan Sorokin Date: Wed, 22 May 2024 13:02:37 +0200 Subject: [PATCH 07/14] fixed warnings --- CPP/7zip/Archive/Zip/ZipItem.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/CPP/7zip/Archive/Zip/ZipItem.cpp b/CPP/7zip/Archive/Zip/ZipItem.cpp index b8976b589..4d918ecc7 100644 --- a/CPP/7zip/Archive/Zip/ZipItem.cpp +++ b/CPP/7zip/Archive/Zip/ZipItem.cpp @@ -570,16 +570,18 @@ void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, boo AString sUtf8; - size_t slen = s.Len(); + unsigned slen = s.Len(); char* src = s.Ptr_non_const(); - size_t dlen = slen * 4 + 1; // (source length * 4) + null termination + unsigned dlen = slen * 4 + 1; // (source length * 4) + null termination char* dst = sUtf8.GetBuf_SetEnd(dlen); const char* dstStart = dst; memset(dst, 0, dlen); - size_t done = iconv(cd, &src, &slen, &dst, &dlen); + size_t slen_size_t = static_cast(slen); + size_t dlen_size_t = static_cast(dlen); + size_t done = iconv(cd, &src, &slen_size_t, &dst, &dlen_size_t); if (done == (size_t)-1) { iconv_close(cd); From 066d0dce415fa2c12aedcd1ca105d43c724414fe Mon Sep 17 00:00:00 2001 From: Ivan Sorokin Date: Wed, 22 May 2024 13:10:13 +0200 Subject: [PATCH 08/14] fixed one more warning --- CPP/7zip/Archive/Zip/ZipItem.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CPP/7zip/Archive/Zip/ZipItem.cpp b/CPP/7zip/Archive/Zip/ZipItem.cpp index 4d918ecc7..4e2ae4306 100644 --- a/CPP/7zip/Archive/Zip/ZipItem.cpp +++ b/CPP/7zip/Archive/Zip/ZipItem.cpp @@ -597,8 +597,8 @@ void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, boo iconv_close(cd); AString sUtf8CorrectLength; - unsigned dstCorrectLength = dst - dstStart; - sUtf8CorrectLength.SetFrom(sUtf8, dstCorrectLength); + size_t dstCorrectLength = dst - dstStart; + sUtf8CorrectLength.SetFrom(sUtf8, static_cast(dstCorrectLength)); if (ConvertUTF8ToUnicode(sUtf8CorrectLength, res) /*|| ignore_Utf8_Errors*/) return; } From 8fb8503a939db8df268ffb9e4f563ce0eb76e8f5 Mon Sep 17 00:00:00 2001 From: Ivan Sorokin Date: Thu, 23 May 2024 00:39:26 +0200 Subject: [PATCH 09/14] Fixed some minor errors: 1) Avoid possible problems with table size calculation 2) Assume CP437 if charset detection fails --- CPP/7zip/Archive/Zip/ZipItem.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/CPP/7zip/Archive/Zip/ZipItem.cpp b/CPP/7zip/Archive/Zip/ZipItem.cpp index 4e2ae4306..dc061e6fd 100644 --- a/CPP/7zip/Archive/Zip/ZipItem.cpp +++ b/CPP/7zip/Archive/Zip/ZipItem.cpp @@ -547,7 +547,12 @@ void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, boo if (isOem || isAnsi) { const char *legacyCp = nullptr; - int tableLen = sizeof(isOem ? lcToOemTable : lcToAnsiTable) / sizeof(char *); + int tableLen; + if (isOem) { + tableLen = sizeof(lcToOemTable) / sizeof(lcToOemTable[0]); + } else { + tableLen = sizeof(lcToAnsiTable) / sizeof(lcToAnsiTable[0]); + } int lcLen = 0, i; // Detect required code page name from current locale @@ -564,6 +569,11 @@ void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, boo } } + // not found; use 437 by default + if (!legacyCp) { + legacyCp = "CP437"; + } + if (legacyCp) { iconv_t cd; if ((cd = iconv_open("UTF-8", legacyCp)) != (iconv_t)-1) { From 1a9bf116e8630116486c53bf3d413ad3725a9fe6 Mon Sep 17 00:00:00 2001 From: Ivan Sorokin Date: Thu, 23 May 2024 01:56:09 +0200 Subject: [PATCH 10/14] avoid compiler warnings --- CPP/7zip/Archive/Zip/ZipItem.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/CPP/7zip/Archive/Zip/ZipItem.cpp b/CPP/7zip/Archive/Zip/ZipItem.cpp index dc061e6fd..8eede1159 100644 --- a/CPP/7zip/Archive/Zip/ZipItem.cpp +++ b/CPP/7zip/Archive/Zip/ZipItem.cpp @@ -547,12 +547,8 @@ void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, boo if (isOem || isAnsi) { const char *legacyCp = nullptr; - int tableLen; - if (isOem) { - tableLen = sizeof(lcToOemTable) / sizeof(lcToOemTable[0]); - } else { - tableLen = sizeof(lcToAnsiTable) / sizeof(lcToAnsiTable[0]); - } + // lcToOemTable and lcToAnsiTable should have equal size as locales list is the same + int tableLen = sizeof(lcToOemTable) / sizeof(lcToOemTable[0]); int lcLen = 0, i; // Detect required code page name from current locale From ba281a05560a438a72f81389107a048339158738 Mon Sep 17 00:00:00 2001 From: Ivan Sorokin Date: Sun, 26 May 2024 14:12:48 +0200 Subject: [PATCH 11/14] This code branch was actually unused --- CPP/7zip/Archive/Zip/ZipItem.cpp | 47 ++------------------------------ 1 file changed, 3 insertions(+), 44 deletions(-) diff --git a/CPP/7zip/Archive/Zip/ZipItem.cpp b/CPP/7zip/Archive/Zip/ZipItem.cpp index 8eede1159..5fbd4addb 100644 --- a/CPP/7zip/Archive/Zip/ZipItem.cpp +++ b/CPP/7zip/Archive/Zip/ZipItem.cpp @@ -496,58 +496,17 @@ void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, boo "ur_PK", "CP720", "uz_UZ", "CP866", "uz_UZ", "CP857", "vi_VN", "CP1258", "wa_BE", "CP850", "zh_HK", "CP950", "zh_SG", "CP936"}; - const char *lcToAnsiTable[] = { - "af_ZA", "CP1252", "ar_SA", "CP1256", "ar_LB", "CP1256", "ar_EG", "CP1256", - "ar_DZ", "CP1256", "ar_BH", "CP1256", "ar_IQ", "CP1256", "ar_JO", "CP1256", - "ar_KW", "CP1256", "ar_LY", "CP1256", "ar_MA", "CP1256", "ar_OM", "CP1256", - "ar_QA", "CP1256", "ar_SY", "CP1256", "ar_TN", "CP1256", "ar_AE", "CP1256", - "ar_YE", "CP1256","ast_ES", "CP1252", "az_AZ", "CP1251", "az_AZ", "CP1254", - "be_BY", "CP1251", "bg_BG", "CP1251", "br_FR", "CP1252", "ca_ES", "CP1252", - "zh_CN", "CP936", "zh_TW", "CP950", "kw_GB", "CP1252", "cs_CZ", "CP1250", - "cy_GB", "CP1252", "da_DK", "CP1252", "de_AT", "CP1252", "de_LI", "CP1252", - "de_LU", "CP1252", "de_CH", "CP1252", "de_DE", "CP1252", "el_GR", "CP1253", - "en_AU", "CP1252", "en_CA", "CP1252", "en_GB", "CP1252", "en_IE", "CP1252", - "en_JM", "CP1252", "en_BZ", "CP1252", "en_PH", "CP1252", "en_ZA", "CP1252", - "en_TT", "CP1252", "en_US", "CP1252", "en_ZW", "CP1252", "en_NZ", "CP1252", - "es_PA", "CP1252", "es_BO", "CP1252", "es_CR", "CP1252", "es_DO", "CP1252", - "es_SV", "CP1252", "es_EC", "CP1252", "es_GT", "CP1252", "es_HN", "CP1252", - "es_NI", "CP1252", "es_CL", "CP1252", "es_MX", "CP1252", "es_ES", "CP1252", - "es_CO", "CP1252", "es_ES", "CP1252", "es_PE", "CP1252", "es_AR", "CP1252", - "es_PR", "CP1252", "es_VE", "CP1252", "es_UY", "CP1252", "es_PY", "CP1252", - "et_EE", "CP1257", "eu_ES", "CP1252", "fa_IR", "CP1256", "fi_FI", "CP1252", - "fo_FO", "CP1252", "fr_FR", "CP1252", "fr_BE", "CP1252", "fr_CA", "CP1252", - "fr_LU", "CP1252", "fr_MC", "CP1252", "fr_CH", "CP1252", "ga_IE", "CP1252", - "gd_GB", "CP1252", "gv_IM", "CP1252", "gl_ES", "CP1252", "he_IL", "CP1255", - "hr_HR", "CP1250", "hu_HU", "CP1250", "id_ID", "CP1252", "is_IS", "CP1252", - "it_IT", "CP1252", "it_CH", "CP1252", "iv_IV", "CP1252", "ja_JP", "CP932", - "kk_KZ", "CP1251", "ko_KR", "CP949", "ky_KG", "CP1251", "lt_LT", "CP1257", - "lv_LV", "CP1257", "mk_MK", "CP1251", "mn_MN", "CP1251", "ms_BN", "CP1252", - "ms_MY", "CP1252", "nl_BE", "CP1252", "nl_NL", "CP1252", "nl_SR", "CP1252", - "nn_NO", "CP1252", "nb_NO", "CP1252", "pl_PL", "CP1250", "pt_BR", "CP1252", - "pt_PT", "CP1252", "rm_CH", "CP1252", "ro_RO", "CP1250", "ru_RU", "CP1251", - "sk_SK", "CP1250", "sl_SI", "CP1250", "sq_AL", "CP1250", "sr_RS", "CP1251", - "sr_RS", "CP1250", "sv_SE", "CP1252", "sv_FI", "CP1252", "sw_KE", "CP1252", - "th_TH", "CP874", "tr_TR", "CP1254", "tt_RU", "CP1251", "uk_UA", "CP1251", - "ur_PK", "CP1256", "uz_UZ", "CP1251", "uz_UZ", "CP1254", "vi_VN", "CP1258", - "wa_BE", "CP1252", "zh_HK", "CP950", "zh_SG", "CP936"}; - - bool isAnsi = false; bool isOem = false; if (!isUtf8 && - MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS && - MadeByVersion.Version >= 20) { - isAnsi = true; - } else if (!isUtf8 && (MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS || MadeByVersion.HostOS == NFileHeader::NHostOS::kFAT)) { isOem = true; } - if (isOem || isAnsi) { + if (isOem) { const char *legacyCp = nullptr; - // lcToOemTable and lcToAnsiTable should have equal size as locales list is the same int tableLen = sizeof(lcToOemTable) / sizeof(lcToOemTable[0]); int lcLen = 0, i; @@ -559,8 +518,8 @@ void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, boo for (lcLen = 0; lc[lcLen] != '.' && lc[lcLen] != ':' && lc[lcLen] != '\0'; ++lcLen); for (i = 0; i < tableLen; i += 2) - if (strncmp(lc, (isOem ? lcToOemTable[i] : lcToAnsiTable[i]), lcLen) == 0) { - legacyCp = isOem ? lcToOemTable[i + 1] : lcToAnsiTable[i + 1]; + if (strncmp(lc, (lcToOemTable[i]), lcLen) == 0) { + legacyCp = lcToOemTable[i + 1]; break; // Stop searching once a match is found } } From 61f1a637e907774cd9a71beac70db850ca325554 Mon Sep 17 00:00:00 2001 From: Ivan Sorokin Date: Mon, 27 May 2024 15:07:03 +0200 Subject: [PATCH 12/14] added -mcp command line option support --- CPP/7zip/Archive/Zip/ZipItem.cpp | 70 +++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 10 deletions(-) diff --git a/CPP/7zip/Archive/Zip/ZipItem.cpp b/CPP/7zip/Archive/Zip/ZipItem.cpp index 5fbd4addb..90d406074 100644 --- a/CPP/7zip/Archive/Zip/ZipItem.cpp +++ b/CPP/7zip/Archive/Zip/ZipItem.cpp @@ -496,6 +496,41 @@ void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, boo "ur_PK", "CP720", "uz_UZ", "CP866", "uz_UZ", "CP857", "vi_VN", "CP1258", "wa_BE", "CP850", "zh_HK", "CP950", "zh_SG", "CP936"}; + const char *lcToAnsiTable[] = { + "af_ZA", "CP1252", "ar_SA", "CP1256", "ar_LB", "CP1256", "ar_EG", "CP1256", + "ar_DZ", "CP1256", "ar_BH", "CP1256", "ar_IQ", "CP1256", "ar_JO", "CP1256", + "ar_KW", "CP1256", "ar_LY", "CP1256", "ar_MA", "CP1256", "ar_OM", "CP1256", + "ar_QA", "CP1256", "ar_SY", "CP1256", "ar_TN", "CP1256", "ar_AE", "CP1256", + "ar_YE", "CP1256","ast_ES", "CP1252", "az_AZ", "CP1251", "az_AZ", "CP1254", + "be_BY", "CP1251", "bg_BG", "CP1251", "br_FR", "CP1252", "ca_ES", "CP1252", + "zh_CN", "CP936", "zh_TW", "CP950", "kw_GB", "CP1252", "cs_CZ", "CP1250", + "cy_GB", "CP1252", "da_DK", "CP1252", "de_AT", "CP1252", "de_LI", "CP1252", + "de_LU", "CP1252", "de_CH", "CP1252", "de_DE", "CP1252", "el_GR", "CP1253", + "en_AU", "CP1252", "en_CA", "CP1252", "en_GB", "CP1252", "en_IE", "CP1252", + "en_JM", "CP1252", "en_BZ", "CP1252", "en_PH", "CP1252", "en_ZA", "CP1252", + "en_TT", "CP1252", "en_US", "CP1252", "en_ZW", "CP1252", "en_NZ", "CP1252", + "es_PA", "CP1252", "es_BO", "CP1252", "es_CR", "CP1252", "es_DO", "CP1252", + "es_SV", "CP1252", "es_EC", "CP1252", "es_GT", "CP1252", "es_HN", "CP1252", + "es_NI", "CP1252", "es_CL", "CP1252", "es_MX", "CP1252", "es_ES", "CP1252", + "es_CO", "CP1252", "es_ES", "CP1252", "es_PE", "CP1252", "es_AR", "CP1252", + "es_PR", "CP1252", "es_VE", "CP1252", "es_UY", "CP1252", "es_PY", "CP1252", + "et_EE", "CP1257", "eu_ES", "CP1252", "fa_IR", "CP1256", "fi_FI", "CP1252", + "fo_FO", "CP1252", "fr_FR", "CP1252", "fr_BE", "CP1252", "fr_CA", "CP1252", + "fr_LU", "CP1252", "fr_MC", "CP1252", "fr_CH", "CP1252", "ga_IE", "CP1252", + "gd_GB", "CP1252", "gv_IM", "CP1252", "gl_ES", "CP1252", "he_IL", "CP1255", + "hr_HR", "CP1250", "hu_HU", "CP1250", "id_ID", "CP1252", "is_IS", "CP1252", + "it_IT", "CP1252", "it_CH", "CP1252", "iv_IV", "CP1252", "ja_JP", "CP932", + "kk_KZ", "CP1251", "ko_KR", "CP949", "ky_KG", "CP1251", "lt_LT", "CP1257", + "lv_LV", "CP1257", "mk_MK", "CP1251", "mn_MN", "CP1251", "ms_BN", "CP1252", + "ms_MY", "CP1252", "nl_BE", "CP1252", "nl_NL", "CP1252", "nl_SR", "CP1252", + "nn_NO", "CP1252", "nb_NO", "CP1252", "pl_PL", "CP1250", "pt_BR", "CP1252", + "pt_PT", "CP1252", "rm_CH", "CP1252", "ro_RO", "CP1250", "ru_RU", "CP1251", + "sk_SK", "CP1250", "sl_SI", "CP1250", "sq_AL", "CP1250", "sr_RS", "CP1251", + "sr_RS", "CP1250", "sv_SE", "CP1252", "sv_FI", "CP1252", "sw_KE", "CP1252", + "th_TH", "CP874", "tr_TR", "CP1254", "tt_RU", "CP1251", "uk_UA", "CP1251", + "ur_PK", "CP1256", "uz_UZ", "CP1251", "uz_UZ", "CP1254", "vi_VN", "CP1258", + "wa_BE", "CP1252", "zh_HK", "CP950", "zh_SG", "CP936"}; + bool isOem = false; if (!isUtf8 && @@ -504,9 +539,11 @@ void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, boo isOem = true; } - if (isOem) { + const char *legacyCp = nullptr; + const char *legacyCpAnsi = nullptr; + + if (isOem || (useSpecifiedCodePage && (codePage != 65001))) { - const char *legacyCp = nullptr; int tableLen = sizeof(lcToOemTable) / sizeof(lcToOemTable[0]); int lcLen = 0, i; @@ -520,18 +557,31 @@ void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, boo for (i = 0; i < tableLen; i += 2) if (strncmp(lc, (lcToOemTable[i]), lcLen) == 0) { legacyCp = lcToOemTable[i + 1]; + legacyCpAnsi = lcToAnsiTable[i + 1]; break; // Stop searching once a match is found } - } - // not found; use 437 by default - if (!legacyCp) { - legacyCp = "CP437"; - } + if (!legacyCp) { + legacyCp = "CP437"; + legacyCpAnsi = "CP1252"; + } - if (legacyCp) { + char specCP[20]; + if (useSpecifiedCodePage) { + if (codePage == 0) { + strncpy(specCP, legacyCpAnsi, sizeof(legacyCpAnsi) - 1); + specCP[sizeof(legacyCpAnsi) - 1] = '\0'; + } + else if (codePage == 1) { + strncpy(specCP, legacyCp, sizeof(legacyCp) - 1); + specCP[sizeof(legacyCp) - 1] = '\0'; } + else { + snprintf(specCP, sizeof(specCP), "CP%d", codePage); + } + } + iconv_t cd; - if ((cd = iconv_open("UTF-8", legacyCp)) != (iconv_t)-1) { + if ((cd = iconv_open("UTF-8", useSpecifiedCodePage ? specCP : legacyCp)) != (iconv_t)-1) { AString sUtf8; @@ -570,7 +620,7 @@ void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, boo } } #endif - + if (isUtf8) { ConvertUTF8ToUnicode(s, res); From c09652776aebb9b21127ab14c1fd3eb2bba5a407 Mon Sep 17 00:00:00 2001 From: Ivan Sorokin Date: Mon, 27 May 2024 15:59:31 +0200 Subject: [PATCH 13/14] minor --- CPP/7zip/Archive/Zip/ZipItem.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/CPP/7zip/Archive/Zip/ZipItem.cpp b/CPP/7zip/Archive/Zip/ZipItem.cpp index 90d406074..10153d681 100644 --- a/CPP/7zip/Archive/Zip/ZipItem.cpp +++ b/CPP/7zip/Archive/Zip/ZipItem.cpp @@ -3,6 +3,7 @@ #ifndef _WIN32 #include #include +#include #endif #include "StdAfx.h" From 1ce48da37ffc115e81e401f917028486c76620c9 Mon Sep 17 00:00:00 2001 From: Ivan Sorokin Date: Tue, 28 May 2024 01:49:14 +0200 Subject: [PATCH 14/14] Fix https://sourceforge.net/p/sevenzip/bugs/1060/ --- CPP/7zip/Archive/Zip/ZipItem.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/CPP/7zip/Archive/Zip/ZipItem.cpp b/CPP/7zip/Archive/Zip/ZipItem.cpp index 10153d681..adaaa6b44 100644 --- a/CPP/7zip/Archive/Zip/ZipItem.cpp +++ b/CPP/7zip/Archive/Zip/ZipItem.cpp @@ -533,17 +533,23 @@ void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, boo "wa_BE", "CP1252", "zh_HK", "CP950", "zh_SG", "CP936"}; bool isOem = false; + bool isAnsi = false; if (!isUtf8 && - (MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS || - MadeByVersion.HostOS == NFileHeader::NHostOS::kFAT)) { + MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS && + MadeByVersion.Version >= 20) { + isAnsi = true; + } else + if (!isUtf8 && + (MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS || + MadeByVersion.HostOS == NFileHeader::NHostOS::kFAT)) { isOem = true; } const char *legacyCp = nullptr; const char *legacyCpAnsi = nullptr; - if (isOem || (useSpecifiedCodePage && (codePage != 65001))) { + if (isOem || isAnsi || (useSpecifiedCodePage && (codePage != 65001))) { int tableLen = sizeof(lcToOemTable) / sizeof(lcToOemTable[0]); int lcLen = 0, i; @@ -582,7 +588,7 @@ void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, boo } iconv_t cd; - if ((cd = iconv_open("UTF-8", useSpecifiedCodePage ? specCP : legacyCp)) != (iconv_t)-1) { + if ((cd = iconv_open("UTF-8", useSpecifiedCodePage ? specCP : (isOem ? legacyCp : legacyCpAnsi))) != (iconv_t)-1) { AString sUtf8;