/// /// Copyright © 2003-2008 JetBrains s.r.o. /// You may distribute under the terms of the GNU General Public License, as published by the Free Software Foundation, version 2 (see License.txt in the repository root folder). /// using System; using System.Collections; using System.Collections.Generic; using System.Text; using System.Text.RegularExpressions; using JetBrains.Annotations; using JetBrains.Interop.WinApi; using JetBrains.Omea.OpenAPI; using Microsoft.Win32; namespace JetBrains.Omea.Charsets { [Flags] public enum CharsetFlags { None = 0, NntpCharset = 1 } /// /// Enumerates charsets stored in registry and applicable to internet mail, news or web pages. /// public class CharsetsEnum : IEnumerable { #region Data /// /// Name of the Registry value that, for an alias charset, gets the name of the destination charset. /// protected static readonly string AliasForCharsetValue = "AliasForCharset"; /// /// A Registry key with the list of charsets, by their names, with links to the appropriate codepages; and aliases to the charsets. /// protected static readonly string KeyCharsets = @"MIME\Database\Charset"; /// /// A Registry key with the list of codepages, to which we jump from the charset to retrieve the user-friendly name of the encoding. /// protected static readonly string KeyCodepages = @"MIME\Database\Codepage"; /// /// Name of the Registry value that gets the user-friendly description of a code page. /// protected static readonly string ValueDescription = "Description"; /// /// Name of the Registry value that gives the codepage family. /// protected static readonly string ValueFamily = "Family"; /// /// Name of the Registry value that gets the codepage associated with a charset. /// protected static readonly string ValueInternetEncoding = "InternetEncoding"; protected CharsetFlags _flags; #endregion #region Init public CharsetsEnum() { _flags = CharsetFlags.None; } public CharsetsEnum(CharsetFlags flags) { _flags = flags; } #endregion #region Operations /// /// Looks up a charset by its name. Throws an exception on failure. /// /// The charset identified by the given charset name. public static Charset GetCharset(string name) { Charset retval = Charset.TryGetCharset(name); if(retval == null) throw new InvalidOperationException(String.Format("Unable to create the charset object. There is no information available in the registry for the “{0}” charset.", name)); return retval; } /// /// Returns the object for the charset that is registered as the system's default for message bodies. /// Note that this it does not necessarily correspond to the system's ANSI code page, eg for Cyrillic systems it will be “koi8-r” whilst having the default ANSI codepage set to “windows-1251”. /// /// The default body charset. public static Charset GetDefaultBodyCharset() { return GetCharset(Encoding.Default.BodyName); } /// /// Returns the object for the charset that is registered as the system's default for Web and local use. /// Unlike , usually corresponds to the system's ANSI codepage. /// /// The default web charset. public static Charset GetDefaultWebCharset() { return GetCharset(Encoding.Default.WebName); } /// /// Looks up a charset by its name. Returns Null on failure and does not throw an exception. /// /// The charset identified by the given charset name, /// or a Null value if such a charset is not registered in the system. public static Charset TryGetCharset(string name) { return Charset.TryGetCharset(name); } #endregion #region IEnumerable Members /// ///Returns an enumerator that iterates through the collection. /// /// /// ///A that can be used to iterate through the collection. /// ///1 public IEnumerator GetEnumerator() { // Start enumerating the charsets (subkeys) RegistryKey key = Registry.ClassesRoot.OpenSubKey(KeyCharsets, false); string[] arCharsetNames = key != null ? key.GetSubKeyNames() : new string[] {}; // Jump to the next charset that is not an alias foreach(string sCharset in arCharsetNames) { Charset current = Charset.TryGetCharset(sCharset); if((current != null) && (current.Name == sCharset)) // Name may differ if it was an alias, such ones we skip yield return current; // Gotten the next value OK } } /// ///Returns an enumerator that iterates through a collection. /// /// /// ///An object that can be used to iterate through the collection. /// ///2 IEnumerator IEnumerable.GetEnumerator() { return GetEnumerator(); } #endregion #region Charset Type /// /// Represents a single charset in the charsets enumeration. /// /// The comparer sorts charsets first by the family, then lexicographically by the description within a family. public class Charset : IComparable { #region Data private const string RegexDll = "Dll"; private const string RegexResId = "ResId"; /// /// Regex that matches charset descriptions referencing an external resource in a DLL. /// Example: “@%SystemRoot%\system32\mlang.dll,-4643” (for utf-8). /// [NotNull] protected static readonly Regex _regexExternalDescription = new Regex(string.Format(@"^@(?<{0}>.+),(?<{1}>-?\d+)", RegexDll, RegexResId), RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase | RegexOptions.Singleline); /// /// Codepage identifier for this charset. /// It is a codepage for which this charset is the InternetEncoding. /// protected readonly int _codepage; /// /// User-friendly description for this charset. /// protected readonly string _description; /// /// Internet string ID for the charset. /// protected readonly string _name; /// /// Some codepages are gathered into a family, eg all the Cyrillic encodings, all the Unicodes, and so on. /// For such a family, this is a codepage for the main charset in the family. /// For those charsets that don't have a family, it's merely the same value as . /// If is equal to , /// then it's either the main charset of the family or a standalone charset without a family. /// protected readonly int _nFamilyCodepage; #endregion #region Init /// /// Constructs the charset object, specifying all of its parameters explicitly. /// For internal use only. Call instead. /// /// Name of the charset (eg “Windows-1251”). /// User-friendly description of the encoding (eg “Cyrillic (Windows)”). /// Personal codepage of this charset (eg 1251 for “windows-1251” but 20866 for “koi8-r”). /// Family codepage for those charsets that have a family, or self codepage for the rest. internal Charset(string sName, int nCodepage, string sDescription, int nFamilyCodepage) { _name = sName; _description = sDescription; _codepage = nCodepage; _nFamilyCodepage = nFamilyCodepage; } #endregion #region Attributes /// /// Gets the numeric codepage identifier for this charset. /// It is a codepage for which this charset is the InternetEncoding. /// public int Codepage { get { return _codepage; } } /// /// Gets the user-friendly description for this charset. /// public string Description { get { return _description; } } /// /// Some codepages are gathered into a family, eg all the Cyrillic encodings, all the Unicodes, and so on. /// For such a family, this is a codepage for the main charset in the family. /// For those charsets that don't have a family, it's merely the same value as . /// If is equal to , /// then it's either the main charset of the family or a standalone charset without a family. /// public int FamilyCodepage { get { return _nFamilyCodepage; } } /// /// Gets whether the charset is the default for this system/user. /// public bool IsDefaultBodyCharset { get { return _name == Encoding.Default.BodyName; } } /// /// Gets whether the charset is the default for this system/user. /// public bool IsDefaultWebCharset { get { return _name == Encoding.Default.WebName; } } /// /// Gets the Internet string ID for the charset. /// public string Name { get { return _name; } } #endregion #region Operations /// /// Constructs and returns a charset object for the given codepage, /// or a Null value if such a charset is not registered in the system. /// Note that the property of the resulting charset may differ from the /// parameter value, in case the latter specifies an alias to a charset. /// /// Name of the charset to construct. Case-insensitive. /// A instance, in case such a charset exists; Null if an error has occured. public static Charset TryGetCharset(string sCharset) { #region Preconditions if(sCharset == null) throw new ArgumentNullException("sCharset", "Input charset string is NULL."); #endregion Preconditions // Get the Codepage associated with this charset int nCodepage; RegistryKey keyCharset; // Registry Key for the charset if((keyCharset = Registry.ClassesRoot.OpenSubKey(KeyCharsets + '\\' + sCharset, false)) == null) return null; using(keyCharset) { // Alias? object oValue; if(((oValue = keyCharset.GetValue(AliasForCharsetValue)) != null) && (oValue is string)) return TryGetCharset((string)oValue); // Resolve the alias // Get the integer codepage if((nCodepage = (int)keyCharset.GetValue(ValueInternetEncoding, -1)) == -1) return null; // Neither an alias, nor a normal charset %-/ } // Open the codepage key and retrieve the description and family info string sDescription = null; int nFamily = -1; RegistryKey keyCodepage = Registry.ClassesRoot.OpenSubKey(KeyCodepages + '\\' + nCodepage, false); if(keyCodepage != null) // There's such an encoding { using(keyCodepage) { sDescription = keyCodepage.GetValue(ValueDescription) as string; nFamily = (int)keyCodepage.GetValue(ValueFamily, -1); } } // Supply with the default values if(sDescription == null) // Either no such codepage-key or no description value under it sDescription = "Unknown (" + sCharset + ")"; else sDescription = TryParseDescriptionFromResources(sDescription, sCharset); if(nFamily == -1) nFamily = nCodepage; // No family => either a family root, or not a member of a family; use self codepage in both cases // Fill in the charset data return new Charset(sCharset, nCodepage, sDescription, nFamily); } #endregion #region Implementation /// /// Some charset descriptions are stored in DLL resources, starting with WinNT 6. /// If such is the case, rip out the description. /// /// The description that might be a resource reference. /// Charset name, to use as a backup in case the points to a missing resource. /// Either the original description, or the results of an attempt to resolve the resource reference. [NotNull] private static string TryParseDescriptionFromResources([NotNull] string description, [NotNull] string sCharset) { if(description == null) throw new ArgumentNullException("description"); if(sCharset == null) throw new ArgumentNullException("sCharset"); try { // Is a reference? Match match = _regexExternalDescription.Match(description); if(!match.Success) return description; // Reference parts string sDll = match.Groups[RegexDll].Value; string sResId = match.Groups[RegexResId].Value; // The ID "AS IS" may be negative, but negative values are not allowed // We'll try bitwise-turning it into UINT, and taking an ABS from it int nResId = int.Parse(sResId); // Try loading foreach(uint uid in new[] {unchecked((uint)nResId), unchecked((uint)-nResId)}) { string resourcestring = User32Dll.Helpers.TryLoadStringResource(sDll, uid); if(resourcestring != null) return resourcestring; } } catch(Exception ex) { Core.ReportException(ex, false); } return sCharset; } #endregion #region Overrides /// /// Compare charsets by their ID. /// public override bool Equals(object obj) { var other = obj as Charset; return other == null ? false : Name.Equals(other.Name); } /// /// Hashes the charset ID. /// public override int GetHashCode() { return Name.GetHashCode(); } /// /// Hands out the charset name / ID. /// public override string ToString() { return Name; } #endregion #region ERROR public static bool operator ==(Charset α, Charset β) { if(ReferenceEquals(α, null)) return ReferenceEquals(β, null) ? true : false; if(ReferenceEquals(β, null)) return false; return α.Name == β.Name; } public static bool operator !=(Charset ξ, Charset η) { if(ReferenceEquals(ξ, null)) return ReferenceEquals(η, null) ? false : true; if(ReferenceEquals(η, null)) return true; return ξ.Name != η.Name; } #endregion #region IComparable Members /// /// The comparer sorts charsets first by the family, then lexicographically by the description within a family. /// public int CompareTo(object obj) { var other = obj as Charset; if(other == null) throw new ArgumentNullException(); // If the charsets have equal IDs, don't perform further comparisons. if(Name.CompareTo(other.Name) == 0) return 0; int diff; // Level 1: Family Codepage if((diff = FamilyCodepage.CompareTo(other.FamilyCodepage)) != 0) return diff; return Description.CompareTo(other.Description); } #endregion } #endregion } }