Index: include/languages.php =================================================================== --- include/languages.php (revision 12904) +++ include/languages.php (working copy) @@ -102,14 +102,20 @@ $dir=bindtextdomain($domain,$dir); // set codeset in order to avoid gettext charset conversions - if (function_exists('bind_textdomain_codeset') - && isset($languages[$sm_notAlias]['CHARSET'])) { + if (function_exists('bind_textdomain_codeset')) { - // Japanese translation uses different internal charset - if ($sm_notAlias == 'ja_JP') { - bind_textdomain_codeset ($domain_name, 'EUC-JP'); - } else { + if (isset($languages[$sm_notAlias]['INTERNAL_CHARSET']) + && $languages[$sm_notAlias]['INTERNAL_CHARSET'] != '') { + + // some translations use different internal charset + bind_textdomain_codeset ($domain_name, + $languages[$sm_notAlias]['INTERNAL_CHARSET']); + + } else if (isset($languages[$sm_notAlias]['CHARSET']) + && isset($languages[$sm_notAlias]['CHARSET'])) { + bind_textdomain_codeset ($domain_name, $languages[$sm_notAlias]['CHARSET']); + } } @@ -475,12 +481,22 @@ putenv( "LANG=$longlocale" ); putenv( "LANGUAGE=$longlocale" ); putenv( "LC_NUMERIC=C" ); + /* + * FIXME: language-specific ones should be called with + * vars in locale/$lang/setup.php (or need some + * XTRA_CODE_code ?) + */ if ($sm_notAlias=='tr_TR') putenv( "LC_CTYPE=C" ); } // Workaround for plugins that use numbers with floating point // It might be removed if plugins use correct decimal delimiters // according to locale settings. setlocale(LC_NUMERIC, 'C'); + /* + * FIXME: language-specific ones should be called with + * vars in locale/$lang/setup.php (or need some + * XTRA_CODE_code ?) + */ // Workaround for specific Turkish strtolower/strtoupper rules. // Many functions expect English conversion rules. if ($sm_notAlias=='tr_TR') setlocale(LC_CTYPE,'C'); @@ -516,50 +532,15 @@ } $squirrelmail_language = $sm_notAlias; - if ($squirrelmail_language == 'ja_JP') { - header ('Content-Type: text/html; charset=EUC-JP'); - if (!function_exists('mb_internal_encoding')) { - // Error messages can't be displayed here - $error = 1; - // Revert to English if possible. - if (function_exists('setPref') && $username!='' && $data_dir!="") { - setPref($data_dir, $username, 'language', "en_US"); - $error = 2; - } - // stop further execution in order not to get php errors on mb_internal_encoding(). - return $error; - } - if (function_exists('mb_language')) { - mb_language('Japanese'); - } - mb_internal_encoding('EUC-JP'); - mb_http_output('pass'); + + if (isset($languages[$sm_notAlias]['XTRA_CODE']) + && function_exists($languages[$sm_notAlias]['XTRA_CODE'] . '_header')) { + return call_user_func($languages[$sm_notAlias]['XTRA_CODE'] . '_header'); } elseif ($squirrelmail_language == 'en_US') { header( 'Content-Type: text/html; charset=' . $default_charset ); } else { header( 'Content-Type: text/html; charset=' . $languages[$sm_notAlias]['CHARSET'] ); } - /** - * mbstring.func_overload fix (#929644). - * - * php mbstring extension can replace standard string functions with their multibyte - * equivalents. See http://www.php.net/ref.mbstring#mbstring.overload. This feature - * was added in php v.4.2.0 - * - * Some SquirrelMail functions work with 8bit strings in bytes. If interface is forced - * to use mbstring functions and mbstring internal encoding is set to multibyte charset, - * interface can't trust regular string functions. Due to mbstring overloading design - * limits php scripts can't control this setting. - * - * This hack should fix some issues related to 8bit strings in passwords. Correct fix is - * to disable mbstring overloading. Japanese translation uses different internal encoding. - */ - if ($squirrelmail_language != 'ja_JP' && - function_exists('mb_internal_encoding') && - check_php_version(4,2,0) && - (int)ini_get('mbstring.func_overload')!=0) { - mb_internal_encoding('pass'); - } } return 0; } @@ -1030,6 +1011,7 @@ * Possible 'variable' names: * NAME - Translation name in English * CHARSET - Encoding used by translation + * INTERNAL_CHARSET - (if translation need extra charset for internal use) * ALIAS - used when 'language' is only short name and 'value' should provide long language name * ALTNAME - Native translation name. Any 8bit symbols must be html encoded. * LOCALE - Full locale name (in xx_XX.charset format). It can use array with more than one locale name since 1.4.5 and 1.5.1 Index: src/compose.php =================================================================== --- src/compose.php (revision 12904) +++ src/compose.php (working copy) @@ -1221,36 +1221,27 @@ showComposeButtonRow(); } - $body_str = ''; - if ($use_signature == true && $newmail == true && !isset($from_htmladdr_search)) { - $signature = $idents[$identity]['signature']; + $body_str = htmlspecialchars(decodeHeader($body,false,false)); - if ($sig_first == '1') { - /* - * FIXME: test is specific to ja_JP translation implementation. - * This test might apply incorrect conversion to other translations, but - * use of 7bit iso-2022-jp charset in other translations might have other - * issues too. - */ - if ($default_charset == 'iso-2022-jp') { - $body_str = "\n\n".($prefix_sig==true? "-- \n":'').mb_convert_encoding($signature, 'EUC-JP'); - } else { - $body_str = "\n\n".($prefix_sig==true? "-- \n":'').decodeHeader($signature,false,false); - } - $body_str .= "\n\n".htmlspecialchars(decodeHeader($body,false,false)); - } else { - $body_str = "\n\n".htmlspecialchars(decodeHeader($body,false,false)); - // FIXME: test is specific to ja_JP translation implementation. See above comments. - if ($default_charset == 'iso-2022-jp') { - $body_str .= "\n\n".($prefix_sig==true? "-- \n":'').mb_convert_encoding($signature, 'EUC-JP'); - } else { - $body_str .= "\n\n".($prefix_sig==true? "-- \n":'').decodeHeader($signature,false,false); - } - } - } else { - $body_str = htmlspecialchars(decodeHeader($body,false,false)); - } + if ($use_signature == true && $newmail == true && !isset($from_htmladdr_search)) { + $signature = $idents[$identity]['signature']; + $sig_str = (($prefix_sig == true) ? "--\n" : ''); + + $charset = $languages[$squirrelmail_language]['INTERNAL_CHARSET']; + if (isset($charset)) { + $sig_str .= mb_convert_encoding($signature, $charset); + } else { + $sig_str .= decodeHeader($signature, false, false); + } + + if ($sig_first == '1') { + $body_str = "\n\n" . $sig_str . "\n\n" . $body_str; + } else { + $body_str = "\n\n" . $body_str . "\n\n" . $sig_str; + } + } + $oTemplate->assign('editor_width', (int)$editor_size); $oTemplate->assign('editor_height', (int)$editor_height); $oTemplate->assign('input_onfocus', 'onfocus="'.join(' ', $onfocus_array).'"'); Index: functions/strings.php =================================================================== --- functions/strings.php (revision 12904) +++ functions/strings.php (working copy) @@ -397,10 +397,16 @@ * @since 1.0 */ function sqUnWordWrap(&$body) { - global $squirrelmail_language; + global $squirrelmail_language, $languages; - if ($squirrelmail_language == 'ja_JP') { - return; + // Use custom unwrapping function, if translation provides it + if (isset($languages[$squirrelmail_language]['XTRA_CODE']) && + function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_unwordwrap')) { + if (mb_detect_encoding($body) != 'ASCII') { + $body = call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_unwor +dwrap', $body); + return; + } } $lines = explode("\n", $body); @@ -1097,23 +1103,14 @@ // default option if (is_null($charset)) return strlen($str); - // lowercase charset name - $charset=strtolower($charset); + // canonicalize and auto-detect (using cache) charset name + $charset = sq_get_auto_charset($charset); - // use automatic charset detection, if function call asks for it - if ($charset=='auto') { - global $default_charset, $squirrelmail_language; - set_my_charset(); - $charset=$default_charset; - if ($squirrelmail_language=='ja_JP') $charset='euc-jp'; - } - - // Use mbstring only with listed charsets - $aList_of_mb_charsets=array('utf-8','big5','gb2312','gb18030','euc-jp','euc-cn','euc-tw','euc-kr'); - // calculate string length according to charset - if (in_array($charset,$aList_of_mb_charsets) && in_array($charset,sq_mb_list_encodings())) { + if (sq_need_mb($charset)) { + $real_length = mb_strlen($str,$charset); + } else { // own strlen detection code is removed because missing strpos, // strtoupper and substr implementations break string wrapping. @@ -1168,33 +1165,10 @@ * @link http://www.php.net/mb_substr */ function sq_substr($string,$start,$length,$charset='auto') { - // use automatic charset detection, if function call asks for it - static $charset_auto, $bUse_mb; - if ($charset=='auto') { - if (!isset($charset_auto)) { - global $default_charset, $squirrelmail_language; - set_my_charset(); - $charset=$default_charset; - if ($squirrelmail_language=='ja_JP') $charset='euc-jp'; - $charset_auto = $charset; - } else { - $charset = $charset_auto; - } - } - $charset = strtolower($charset); + $charset = sq_get_auto_charset($charset); - // in_array call is expensive => do it once and use a static var for - // storing the results - if (!isset($bUse_mb)) { - if (in_array($charset,sq_mb_list_encodings())) { - $bUse_mb = true; - } else { - $bUse_mb = false; - } - } - - if ($bUse_mb) { + if (sq_need_mb($charset)) { return mb_substr($string,$start,$length,$charset); } // TODO: add mbstring independent code @@ -1216,32 +1190,10 @@ * @link http://www.php.net/mb_strpos */ function sq_strpos($haystack,$needle,$offset,$charset='auto') { - // use automatic charset detection, if function call asks for it - static $charset_auto, $bUse_mb; - if ($charset=='auto') { - if (!isset($charset_auto)) { - global $default_charset, $squirrelmail_language; - set_my_charset(); - $charset=$default_charset; - if ($squirrelmail_language=='ja_JP') $charset='euc-jp'; - $charset_auto = $charset; - } else { - $charset = $charset_auto; - } - } - $charset = strtolower($charset); + $charset = sq_get_auto_charset($charset); - // in_array call is expensive => do it once and use a static var for - // storing the results - if (!isset($bUse_mb)) { - if (in_array($charset,sq_mb_list_encodings())) { - $bUse_mb = true; - } else { - $bUse_mb = false; - } - } - if ($bUse_mb) { + if (sq_need_mb($charset)) { return mb_strpos($haystack,$needle,$offset,$charset); } // TODO: add mbstring independent code @@ -1261,36 +1213,12 @@ * @link http://www.php.net/mb_strtoupper */ function sq_strtoupper($string,$charset='auto') { - // use automatic charset detection, if function call asks for it - static $charset_auto, $bUse_mb; - if ($charset=='auto') { - if (!isset($charset_auto)) { - global $default_charset, $squirrelmail_language; - set_my_charset(); - $charset=$default_charset; - if ($squirrelmail_language=='ja_JP') $charset='euc-jp'; - $charset_auto = $charset; - } else { - $charset = $charset_auto; - } - } - $charset = strtolower($charset); + $charset = sq_get_auto_charset($charset); - // in_array call is expensive => do it once and use a static var for - // storing the results - if (!isset($bUse_mb)) { - if (function_exists('mb_strtoupper') && - in_array($charset,sq_mb_list_encodings())) { - $bUse_mb = true; - } else { - $bUse_mb = false; - } + if (sq_need_mb($charset)) { + return mb_strtoupper($string, $charset); } - - if ($bUse_mb) { - return mb_strtoupper($string,$charset); - } // TODO: add mbstring independent code // use vanilla string functions as last option @@ -1318,3 +1246,58 @@ function sq_trim_value ( &$value ) { $value = trim($value); } + +/* + * Use auto charset detection if required. + * Otherwise returns lower-cased charset. + */ +function sq_get_auto_charset($charset) +{ + static $charset_auto; + + if ($charset=='auto') { + + if (!isset($charset_auto)) { + + global $default_charset; + global $squirrelmail_language, $languages; + + set_my_charset(); + $charset = $default_charset; + + /* if some language needs specific internal charset, then use it. */ + if (isset($languages[$squirrelmail_language]['INTERNAL_CHARSET'])) { + $charset = $languages[$squirrelmail_language]['INTERNAL_CHARSET']; + } + + $charset_auto = $charset; + + } else { + + /* use cache if possible. */ + $charset = $charset_auto; + + } + } + + return strtolower($charset); +} + +/* + * Checks if the charset needs mb_* functions. + * in_array() is expensible function, use cache as possible. + */ +function sq_need_mb($charset) +{ + static $bUse_mb; + + if (!isset($bUse_mb)) { + if (in_array($charset, sq_mb_list_encodings())) { + $bUse_mb = true; + } else { + $bUse_mb = false; + } + } + + return $bUse_mb; +} Index: functions/page_header.php =================================================================== --- functions/page_header.php (revision 12904) +++ functions/page_header.php (working copy) @@ -96,17 +96,11 @@ $header_tags .= $oTemplate->fetch_right_to_left_stylesheet_link(); } - if ($squirrelmail_language == 'ja_JP') { - /* - * force correct detection of charset, when browser does not follow - * http content-type and tries to detect charset from page content. - * Shooting of browser's creator can't be implemented in php. - * We might get rid of it, if we follow http://www.w3.org/TR/japanese-xml/ - * recommendations and switch to unicode. - */ - $header_tags .= "\n"; - $header_tags .= '' . "\n"; - } + // Language specific header + if (isset($languages[$squirrelmail_language]['EXTRA_HEADER'])) { + $header_tags .= $languages[$squirrelmail_language]['EXTRA_HEADER']; + } + if ($do_hook) { // NOTE! plugins here MUST assign output to template // and NOT echo anything directly!! A common