Alternate encoding names recognized by .Net / IE
If you run the sample from https://msdn.microsoft.com/en-us/library/system.text.encoding.getencodings.aspx then you can get a list of what Microsoft .Net thinks each Encoding/Code Page's name is. (WebName is more consistent to what's used in charset). eg:
using System;
using System.Text;
public class SamplesEncoding
{
public static void Main()
{
// For every encoding, get the property values.
foreach( EncodingInfo ei in Encoding.GetEncodings() )
{
Encoding e = ei.GetEncoding();
Console.Write( "{0,-6} {1,-25} ", ei.CodePage, ei.Name );
}
}
}
There are several other names that are recognized by Encoding.GetEncoding() however, similar to what IE would recognize in a charset tag. I'm not sure if there's a way to get at the full list of aliases programatically, but this is what you'd get for these input strings:
Label | Code Page |
---|---|
"437" | 437 |
"ANSI_X3.4-1968" | 20127 |
"ANSI_X3.4-1986" | 20127 |
"arabic" | 28596 |
"ascii" | 20127 |
"ASMO-708" | 708 |
"Big5" | 950 |
"Big5-HKSCS" | 950 |
"CCSID00858" | 858 |
"CCSID00924" | 20924 |
"CCSID01140" | 1140 |
"CCSID01141" | 1141 |
"CCSID01142" | 1142 |
"CCSID01143" | 1143 |
"CCSID01144" | 1144 |
"CCSID01145" | 1145 |
"CCSID01146" | 1146 |
"CCSID01147" | 1147 |
"CCSID01148" | 1148 |
"CCSID01149" | 1149 |
"chinese" | 936 |
"cn-big5" | 950 |
"CN-GB" | 936 |
"CP00858" | 858 |
"CP00924" | 20924 |
"CP01140" | 1140 |
"CP01141" | 1141 |
"CP01142" | 1142 |
"CP01143" | 1143 |
"CP01144" | 1144 |
"CP01145" | 1145 |
"CP01146" | 1146 |
"CP01147" | 1147 |
"CP01148" | 1148 |
"CP01149" | 1149 |
"cp037" | 37 |
"cp1025" | 21025 |
"CP1026" | 1026 |
"cp1256" | 1256 |
"CP273" | 20273 |
"CP278" | 20278 |
"CP280" | 20280 |
"CP284" | 20284 |
"CP285" | 20285 |
"cp290" | 20290 |
"cp297" | 20297 |
"cp367" | 20127 |
"cp420" | 20420 |
"cp423" | 20423 |
"cp424" | 20424 |
"cp437" | 437 |
"CP500" | 500 |
"cp50227" | 50227 |
"cp819" | 28591 |
"cp850" | 850 |
"cp852" | 852 |
"cp855" | 855 |
"cp857" | 857 |
"cp858" | 858 |
"cp860" | 860 |
"cp861" | 861 |
"cp862" | 862 |
"cp863" | 863 |
"cp864" | 864 |
"cp865" | 865 |
"cp866" | 866 |
"cp869" | 869 |
"CP870" | 870 |
"CP871" | 20871 |
"cp875" | 875 |
"cp880" | 20880 |
"CP905" | 20905 |
"csASCII" | 20127 |
"csbig5" | 950 |
"csEUCKR" | 51949 |
"csEUCPkdFmtJapanese" | 51932 |
"csGB2312" | 936 |
"csGB231280" | 936 |
"csIBM037" | 37 |
"csIBM1026" | 1026 |
"csIBM273" | 20273 |
"csIBM277" | 20277 |
"csIBM278" | 20278 |
"csIBM280" | 20280 |
"csIBM284" | 20284 |
"csIBM285" | 20285 |
"csIBM290" | 20290 |
"csIBM297" | 20297 |
"csIBM420" | 20420 |
"csIBM423" | 20423 |
"csIBM424" | 20424 |
"csIBM500" | 500 |
"csIBM870" | 870 |
"csIBM871" | 20871 |
"csIBM880" | 20880 |
"csIBM905" | 20905 |
"csIBMThai" | 20838 |
"csISO2022JP" | 50221 |
"csISO2022KR" | 50225 |
"csISO58GB231280" | 936 |
"csISOLatin1" | 28591 |
"csISOLatin2" | 28592 |
"csISOLatin3" | 28593 |
"csISOLatin4" | 28594 |
"csISOLatin5" | 28599 |
"csISOLatin9" | 28605 |
"csISOLatinArabic" | 28596 |
"csISOLatinCyrillic" | 28595 |
"csISOLatinGreek" | 28597 |
"csISOLatinHebrew" | 28598 |
"csKOI8R" | 20866 |
"csKSC56011987" | 949 |
"csPC8CodePage437" | 437 |
"csShiftJIS" | 932 |
"csUnicode11UTF7" | 65000 |
"csWindows31J" | 932 |
"cyrillic" | 28595 |
"DIN_66003" | 20106 |
"DOS-720" | 720 |
"DOS-862" | 862 |
"DOS-874" | 874 |
"ebcdic-cp-ar1" | 20420 |
"ebcdic-cp-be" | 500 |
"ebcdic-cp-ca" | 37 |
"ebcdic-cp-ch" | 500 |
"EBCDIC-CP-DK" | 20277 |
"ebcdic-cp-es" | 20284 |
"ebcdic-cp-fi" | 20278 |
"ebcdic-cp-fr" | 20297 |
"ebcdic-cp-gb" | 20285 |
"ebcdic-cp-gr" | 20423 |
"ebcdic-cp-he" | 20424 |
"ebcdic-cp-is" | 20871 |
"ebcdic-cp-it" | 20280 |
"ebcdic-cp-nl" | 37 |
"EBCDIC-CP-NO" | 20277 |
"ebcdic-cp-roece" | 870 |
"ebcdic-cp-se" | 20278 |
"ebcdic-cp-tr" | 20905 |
"ebcdic-cp-us" | 37 |
"ebcdic-cp-wt" | 37 |
"ebcdic-cp-yu" | 870 |
"EBCDIC-Cyrillic" | 20880 |
"ebcdic-de-273+euro" | 1141 |
"ebcdic-dk-277+euro" | 1142 |
"ebcdic-es-284+euro" | 1145 |
"ebcdic-fi-278+euro" | 1143 |
"ebcdic-fr-297+euro" | 1147 |
"ebcdic-gb-285+euro" | 1146 |
"ebcdic-international-500+euro" | 1148 |
"ebcdic-is-871+euro" | 1149 |
"ebcdic-it-280+euro" | 1144 |
"EBCDIC-JP-kana" | 20290 |
"ebcdic-Latin9--euro" | 20924 |
"ebcdic-no-277+euro" | 1142 |
"ebcdic-se-278+euro" | 1143 |
"ebcdic-us-37+euro" | 1140 |
"ECMA-114" | 28596 |
"ECMA-118" | 28597 |
"ELOT_928" | 28597 |
"euc-cn" | 51936 |
"euc-jp" | 51932 |
"euc-kr" | 51949 |
"Extended_UNIX_Code_Packed_Format_for_Japanese" | 51932 |
"GB18030" | 54936 |
"GB2312" | 936 |
"GB2312-80" | 936 |
"GB231280" | 936 |
"GBK" | 936 |
"GB_2312-80" | 936 |
"German" | 20106 |
"greek" | 28597 |
"greek8" | 28597 |
"hebrew" | 28598 |
"hz-gb-2312" | 52936 |
"IBM-Thai" | 20838 |
"IBM00858" | 858 |
"IBM00924" | 20924 |
"IBM01047" | 1047 |
"IBM01140" | 1140 |
"IBM01141" | 1141 |
"IBM01142" | 1142 |
"IBM01143" | 1143 |
"IBM01144" | 1144 |
"IBM01145" | 1145 |
"IBM01146" | 1146 |
"IBM01147" | 1147 |
"IBM01148" | 1148 |
"IBM01149" | 1149 |
"IBM037" | 37 |
"IBM1026" | 1026 |
"IBM273" | 20273 |
"IBM277" | 20277 |
"IBM278" | 20278 |
"IBM280" | 20280 |
"IBM284" | 20284 |
"IBM285" | 20285 |
"IBM290" | 20290 |
"IBM297" | 20297 |
"IBM367" | 20127 |
"IBM420" | 20420 |
"IBM423" | 20423 |
"IBM424" | 20424 |
"IBM437" | 437 |
"IBM500" | 500 |
"ibm737" | 737 |
"ibm775" | 775 |
"ibm819" | 28591 |
"IBM850" | 850 |
"IBM852" | 852 |
"IBM855" | 855 |
"IBM857" | 857 |
"IBM860" | 860 |
"IBM861" | 861 |
"IBM862" | 862 |
"IBM863" | 863 |
"IBM864" | 864 |
"IBM865" | 865 |
"IBM866" | 866 |
"IBM869" | 869 |
"IBM870" | 870 |
"IBM871" | 20871 |
"IBM880" | 20880 |
"IBM905" | 20905 |
"irv" | 20105 |
"ISO-10646-UCS-2" | 1200 |
"iso-2022-jp" | 50220 |
"iso-2022-jpeuc" | 51932 |
"iso-2022-kr" | 50225 |
"iso-2022-kr-7" | 50225 |
"iso-2022-kr-7bit" | 50225 |
"iso-2022-kr-8" | 51949 |
"iso-2022-kr-8bit" | 51949 |
"iso-8859-1" | 28591 |
"iso-8859-11" | 874 |
"iso-8859-13" | 28603 |
"iso-8859-15" | 28605 |
"iso-8859-2" | 28592 |
"iso-8859-3" | 28593 |
"iso-8859-4" | 28594 |
"iso-8859-5" | 28595 |
"iso-8859-6" | 28596 |
"iso-8859-7" | 28597 |
"iso-8859-8" | 28598 |
"ISO-8859-8 Visual" | 28598 |
"iso-8859-8-i" | 38598 |
"iso-8859-9" | 28599 |
"iso-ir-100" | 28591 |
"iso-ir-101" | 28592 |
"iso-ir-109" | 28593 |
"iso-ir-110" | 28594 |
"iso-ir-126" | 28597 |
"iso-ir-127" | 28596 |
"iso-ir-138" | 28598 |
"iso-ir-144" | 28595 |
"iso-ir-148" | 28599 |
"iso-ir-149" | 949 |
"iso-ir-58" | 936 |
"iso-ir-6" | 20127 |
"ISO646-US" | 20127 |
"iso8859-1" | 28591 |
"iso8859-2" | 28592 |
"ISO_646.irv:1991" | 20127 |
"iso_8859-1" | 28591 |
"ISO_8859-15" | 28605 |
"iso_8859-1:1987" | 28591 |
"iso_8859-2" | 28592 |
"iso_8859-2:1987" | 28592 |
"ISO_8859-3" | 28593 |
"ISO_8859-3:1988" | 28593 |
"ISO_8859-4" | 28594 |
"ISO_8859-4:1988" | 28594 |
"ISO_8859-5" | 28595 |
"ISO_8859-5:1988" | 28595 |
"ISO_8859-6" | 28596 |
"ISO_8859-6:1987" | 28596 |
"ISO_8859-7" | 28597 |
"ISO_8859-7:1987" | 28597 |
"ISO_8859-8" | 28598 |
"ISO_8859-8:1988" | 28598 |
"ISO_8859-9" | 28599 |
"ISO_8859-9:1989" | 28599 |
"Johab" | 1361 |
"koi" | 20866 |
"koi8" | 20866 |
"koi8-r" | 20866 |
"koi8-ru" | 21866 |
"koi8-u" | 21866 |
"koi8r" | 20866 |
"korean" | 949 |
"ks-c-5601" | 949 |
"ks-c5601" | 949 |
"KSC5601" | 949 |
"KSC_5601" | 949 |
"ks_c_5601" | 949 |
"ks_c_5601-1987" | 949 |
"ks_c_5601-1989" | 949 |
"ks_c_5601_1987" | 949 |
"l1" | 28591 |
"l2" | 28592 |
"l3" | 28593 |
"l4" | 28594 |
"l5" | 28599 |
"l9" | 28605 |
"latin1" | 28591 |
"latin2" | 28592 |
"latin3" | 28593 |
"latin4" | 28594 |
"latin5" | 28599 |
"latin9" | 28605 |
"logical" | 28598 |
"macintosh" | 10000 |
"ms_Kanji" | 932 |
"Norwegian" | 20108 |
"NS_4551-1" | 20108 |
"PC-Multilingual-850+euro" | 858 |
"SEN_850200_B" | 20107 |
"shift-jis" | 932 |
"shift_jis" | 932 |
"sjis" | 932 |
"Swedish" | 20107 |
"TIS-620" | 874 |
"ucs-2" | 1200 |
"unicode" | 1200 |
"unicode-1-1-utf-7" | 65000 |
"unicode-1-1-utf-8" | 65001 |
"unicode-2-0-utf-7" | 65000 |
"unicode-2-0-utf-8" | 65001 |
"unicodeFFFE" | 1201 |
"us" | 20127 |
"us-ascii" | 20127 |
"utf-16" | 1200 |
"UTF-16BE" | 1201 |
"UTF-16LE" | 1200 |
"utf-32" | 12000 |
"UTF-32BE" | 12001 |
"UTF-32LE" | 12000 |
"utf-7" | 65000 |
"utf-8" | 65001 |
"visual" | 28598 |
"windows-1250" | 1250 |
"windows-1251" | 1251 |
"windows-1252" | 1252 |
"windows-1253" | 1253 |
"Windows-1254" | 1254 |
"windows-1255" | 1255 |
"windows-1256" | 1256 |
"windows-1257" | 1257 |
"windows-1258" | 1258 |
"windows-874" | 874 |
"x-ansi" | 1252 |
"x-Chinese-CNS" | 20000 |
"x-Chinese-Eten" | 20002 |
"x-cp1250" | 1250 |
"x-cp1251" | 1251 |
"x-cp20001" | 20001 |
"x-cp20003" | 20003 |
"x-cp20004" | 20004 |
"x-cp20005" | 20005 |
"x-cp20261" | 20261 |
"x-cp20269" | 20269 |
"x-cp20936" | 20936 |
"x-cp20949" | 20949 |
"x-cp50227" | 50227 |
"X-EBCDIC-KoreanExtended" | 20833 |
"x-euc" | 51932 |
"x-euc-cn" | 51936 |
"x-euc-jp" | 51932 |
"x-Europa" | 29001 |
"x-IA5" | 20105 |
"x-IA5-German" | 20106 |
"x-IA5-Norwegian" | 20108 |
"x-IA5-Swedish" | 20107 |
"x-iscii-as" | 57006 |
"x-iscii-be" | 57003 |
"x-iscii-de" | 57002 |
"x-iscii-gu" | 57010 |
"x-iscii-ka" | 57008 |
"x-iscii-ma" | 57009 |
"x-iscii-or" | 57007 |
"x-iscii-pa" | 57011 |
"x-iscii-ta" | 57004 |
"x-iscii-te" | 57005 |
"x-mac-arabic" | 10004 |
"x-mac-ce" | 10029 |
"x-mac-chinesesimp" | 10008 |
"x-mac-chinesetrad" | 10002 |
"x-mac-croatian" | 10082 |
"x-mac-cyrillic" | 10007 |
"x-mac-greek" | 10006 |
"x-mac-hebrew" | 10005 |
"x-mac-icelandic" | 10079 |
"x-mac-japanese" | 10001 |
"x-mac-korean" | 10003 |
"x-mac-romanian" | 10010 |
"x-mac-thai" | 10021 |
"x-mac-turkish" | 10081 |
"x-mac-ukrainian" | 10017 |
"x-ms-cp932" | 932 |
"x-sjis" | 932 |
"x-unicode-1-1-utf-7" | 65000 |
"x-unicode-1-1-utf-8" | 65001 |
"x-unicode-2-0-utf-7" | 65000 |
"x-unicode-2-0-utf-8" | 65001 |
"x-x-big5" | 950 |
There's one really egregious name here. UnicodeFFFE is actually Big Endian UTF-16. It's like the byte order mark (BOM) for UTF-16BE written in little endian order. Try to use UTF-16BE instead :)
Note that historically lots of data on the web has been mis-tagged, or isn't tagged at all. For data from windows machines that data is often in the windows system code page, such as windows-1252. So sometimes browsers may attempt to use the current system code page, or try to guess (with varying degrees of success), the actual code page. Additionally there are differences between different vendor's code page behavior causing further ambiguity.
See also: