///
/// Copyright © 2003-2008 JetBrains s.r.o.
/// You may distribute under the terms of the GNU General Public License, as published by the Free Software Foundation, version 2 (see License.txt in the repository root folder).
///
using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using JetBrains.Annotations;
using JetBrains.Interop.WinApi;
using JetBrains.Omea.OpenAPI;
using Microsoft.Win32;
namespace JetBrains.Omea.Charsets
{
[Flags]
public enum CharsetFlags
{
None = 0,
NntpCharset = 1
}
///
/// Enumerates charsets stored in registry and applicable to internet mail, news or web pages.
///
public class CharsetsEnum : IEnumerable
{
#region Data
///
/// Name of the Registry value that, for an alias charset, gets the name of the destination charset.
///
protected static readonly string AliasForCharsetValue = "AliasForCharset";
///
/// A Registry key with the list of charsets, by their names, with links to the appropriate codepages; and aliases to the charsets.
///
protected static readonly string KeyCharsets = @"MIME\Database\Charset";
///
/// A Registry key with the list of codepages, to which we jump from the charset to retrieve the user-friendly name of the encoding.
///
protected static readonly string KeyCodepages = @"MIME\Database\Codepage";
///
/// Name of the Registry value that gets the user-friendly description of a code page.
///
protected static readonly string ValueDescription = "Description";
///
/// Name of the Registry value that gives the codepage family.
///
protected static readonly string ValueFamily = "Family";
///
/// Name of the Registry value that gets the codepage associated with a charset.
///
protected static readonly string ValueInternetEncoding = "InternetEncoding";
protected CharsetFlags _flags;
#endregion
#region Init
public CharsetsEnum()
{
_flags = CharsetFlags.None;
}
public CharsetsEnum(CharsetFlags flags)
{
_flags = flags;
}
#endregion
#region Operations
///
/// Looks up a charset by its name. Throws an exception on failure.
///
/// The charset identified by the given charset name.
public static Charset GetCharset(string name)
{
Charset retval = Charset.TryGetCharset(name);
if(retval == null)
throw new InvalidOperationException(String.Format("Unable to create the charset object. There is no information available in the registry for the “{0}” charset.", name));
return retval;
}
///
/// Returns the object for the charset that is registered as the system's default for message bodies.
/// Note that this it does not necessarily correspond to the system's ANSI code page, eg for Cyrillic systems it will be “koi8-r” whilst having the default ANSI codepage set to “windows-1251”.
///
/// The default body charset.
public static Charset GetDefaultBodyCharset()
{
return GetCharset(Encoding.Default.BodyName);
}
///
/// Returns the object for the charset that is registered as the system's default for Web and local use.
/// Unlike , usually corresponds to the system's ANSI codepage.
///
/// The default web charset.
public static Charset GetDefaultWebCharset()
{
return GetCharset(Encoding.Default.WebName);
}
///
/// Looks up a charset by its name. Returns Null on failure and does not throw an exception.
///
/// The charset identified by the given charset name,
/// or a Null value if such a charset is not registered in the system.
public static Charset TryGetCharset(string name)
{
return Charset.TryGetCharset(name);
}
#endregion
#region IEnumerable Members
///
///Returns an enumerator that iterates through the collection.
///
///
///
///A that can be used to iterate through the collection.
///
///1
public IEnumerator GetEnumerator()
{
// Start enumerating the charsets (subkeys)
RegistryKey key = Registry.ClassesRoot.OpenSubKey(KeyCharsets, false);
string[] arCharsetNames = key != null ? key.GetSubKeyNames() : new string[] {};
// Jump to the next charset that is not an alias
foreach(string sCharset in arCharsetNames)
{
Charset current = Charset.TryGetCharset(sCharset);
if((current != null) && (current.Name == sCharset)) // Name may differ if it was an alias, such ones we skip
yield return current; // Gotten the next value OK
}
}
///
///Returns an enumerator that iterates through a collection.
///
///
///
///An object that can be used to iterate through the collection.
///
///2
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
#endregion
#region Charset Type
///
/// Represents a single charset in the charsets enumeration.
///
/// The comparer sorts charsets first by the family, then lexicographically by the description within a family.
public class Charset : IComparable
{
#region Data
private const string RegexDll = "Dll";
private const string RegexResId = "ResId";
///
/// Regex that matches charset descriptions referencing an external resource in a DLL.
/// Example: “@%SystemRoot%\system32\mlang.dll,-4643” (for utf-8).
///
[NotNull]
protected static readonly Regex _regexExternalDescription = new Regex(string.Format(@"^@(?<{0}>.+),(?<{1}>-?\d+)", RegexDll, RegexResId), RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase | RegexOptions.Singleline);
///
/// Codepage identifier for this charset.
/// It is a codepage for which this charset is the InternetEncoding.
///
protected readonly int _codepage;
///
/// User-friendly description for this charset.
///
protected readonly string _description;
///
/// Internet string ID for the charset.
///
protected readonly string _name;
///
/// Some codepages are gathered into a family, eg all the Cyrillic encodings, all the Unicodes, and so on.
/// For such a family, this is a codepage for the main charset in the family.
/// For those charsets that don't have a family, it's merely the same value as .
/// If is equal to ,
/// then it's either the main charset of the family or a standalone charset without a family.
///
protected readonly int _nFamilyCodepage;
#endregion
#region Init
///
/// Constructs the charset object, specifying all of its parameters explicitly.
/// For internal use only. Call instead.
///
/// Name of the charset (eg “Windows-1251”).
/// User-friendly description of the encoding (eg “Cyrillic (Windows)”).
/// Personal codepage of this charset (eg 1251 for “windows-1251” but 20866 for “koi8-r”).
/// Family codepage for those charsets that have a family, or self codepage for the rest.
internal Charset(string sName, int nCodepage, string sDescription, int nFamilyCodepage)
{
_name = sName;
_description = sDescription;
_codepage = nCodepage;
_nFamilyCodepage = nFamilyCodepage;
}
#endregion
#region Attributes
///
/// Gets the numeric codepage identifier for this charset.
/// It is a codepage for which this charset is the InternetEncoding.
///
public int Codepage
{
get
{
return _codepage;
}
}
///
/// Gets the user-friendly description for this charset.
///
public string Description
{
get
{
return _description;
}
}
///
/// Some codepages are gathered into a family, eg all the Cyrillic encodings, all the Unicodes, and so on.
/// For such a family, this is a codepage for the main charset in the family.
/// For those charsets that don't have a family, it's merely the same value as .
/// If is equal to ,
/// then it's either the main charset of the family or a standalone charset without a family.
///
public int FamilyCodepage
{
get
{
return _nFamilyCodepage;
}
}
///
/// Gets whether the charset is the default for this system/user.
///
public bool IsDefaultBodyCharset
{
get
{
return _name == Encoding.Default.BodyName;
}
}
///
/// Gets whether the charset is the default for this system/user.
///
public bool IsDefaultWebCharset
{
get
{
return _name == Encoding.Default.WebName;
}
}
///
/// Gets the Internet string ID for the charset.
///
public string Name
{
get
{
return _name;
}
}
#endregion
#region Operations
///
/// Constructs and returns a charset object for the given codepage,
/// or a Null value if such a charset is not registered in the system.
/// Note that the property of the resulting charset may differ from the
/// parameter value, in case the latter specifies an alias to a charset.
///
/// Name of the charset to construct. Case-insensitive.
/// A instance, in case such a charset exists; Null if an error has occured.
public static Charset TryGetCharset(string sCharset)
{
#region Preconditions
if(sCharset == null)
throw new ArgumentNullException("sCharset", "Input charset string is NULL.");
#endregion Preconditions
// Get the Codepage associated with this charset
int nCodepage;
RegistryKey keyCharset; // Registry Key for the charset
if((keyCharset = Registry.ClassesRoot.OpenSubKey(KeyCharsets + '\\' + sCharset, false)) == null)
return null;
using(keyCharset)
{
// Alias?
object oValue;
if(((oValue = keyCharset.GetValue(AliasForCharsetValue)) != null) && (oValue is string))
return TryGetCharset((string)oValue); // Resolve the alias
// Get the integer codepage
if((nCodepage = (int)keyCharset.GetValue(ValueInternetEncoding, -1)) == -1)
return null; // Neither an alias, nor a normal charset %-/
}
// Open the codepage key and retrieve the description and family info
string sDescription = null;
int nFamily = -1;
RegistryKey keyCodepage = Registry.ClassesRoot.OpenSubKey(KeyCodepages + '\\' + nCodepage, false);
if(keyCodepage != null) // There's such an encoding
{
using(keyCodepage)
{
sDescription = keyCodepage.GetValue(ValueDescription) as string;
nFamily = (int)keyCodepage.GetValue(ValueFamily, -1);
}
}
// Supply with the default values
if(sDescription == null) // Either no such codepage-key or no description value under it
sDescription = "Unknown (" + sCharset + ")";
else
sDescription = TryParseDescriptionFromResources(sDescription, sCharset);
if(nFamily == -1)
nFamily = nCodepage; // No family => either a family root, or not a member of a family; use self codepage in both cases
// Fill in the charset data
return new Charset(sCharset, nCodepage, sDescription, nFamily);
}
#endregion
#region Implementation
///
/// Some charset descriptions are stored in DLL resources, starting with WinNT 6.
/// If such is the case, rip out the description.
///
/// The description that might be a resource reference.
/// Charset name, to use as a backup in case the points to a missing resource.
/// Either the original description, or the results of an attempt to resolve the resource reference.
[NotNull]
private static string TryParseDescriptionFromResources([NotNull] string description, [NotNull] string sCharset)
{
if(description == null)
throw new ArgumentNullException("description");
if(sCharset == null)
throw new ArgumentNullException("sCharset");
try
{
// Is a reference?
Match match = _regexExternalDescription.Match(description);
if(!match.Success)
return description;
// Reference parts
string sDll = match.Groups[RegexDll].Value;
string sResId = match.Groups[RegexResId].Value;
// The ID "AS IS" may be negative, but negative values are not allowed
// We'll try bitwise-turning it into UINT, and taking an ABS from it
int nResId = int.Parse(sResId);
// Try loading
foreach(uint uid in new[] {unchecked((uint)nResId), unchecked((uint)-nResId)})
{
string resourcestring = User32Dll.Helpers.TryLoadStringResource(sDll, uid);
if(resourcestring != null)
return resourcestring;
}
}
catch(Exception ex)
{
Core.ReportException(ex, false);
}
return sCharset;
}
#endregion
#region Overrides
///
/// Compare charsets by their ID.
///
public override bool Equals(object obj)
{
var other = obj as Charset;
return other == null ? false : Name.Equals(other.Name);
}
///
/// Hashes the charset ID.
///
public override int GetHashCode()
{
return Name.GetHashCode();
}
///
/// Hands out the charset name / ID.
///
public override string ToString()
{
return Name;
}
#endregion
#region ERROR
public static bool operator ==(Charset α, Charset β)
{
if(ReferenceEquals(α, null))
return ReferenceEquals(β, null) ? true : false;
if(ReferenceEquals(β, null))
return false;
return α.Name == β.Name;
}
public static bool operator !=(Charset ξ, Charset η)
{
if(ReferenceEquals(ξ, null))
return ReferenceEquals(η, null) ? false : true;
if(ReferenceEquals(η, null))
return true;
return ξ.Name != η.Name;
}
#endregion
#region IComparable Members
///
/// The comparer sorts charsets first by the family, then lexicographically by the description within a family.
///
public int CompareTo(object obj)
{
var other = obj as Charset;
if(other == null)
throw new ArgumentNullException();
// If the charsets have equal IDs, don't perform further comparisons.
if(Name.CompareTo(other.Name) == 0)
return 0;
int diff;
// Level 1: Family Codepage
if((diff = FamilyCodepage.CompareTo(other.FamilyCodepage)) != 0)
return diff;
return Description.CompareTo(other.Description);
}
#endregion
}
#endregion
}
}