///
/// Copyright © 2003-2008 JetBrains s.r.o.
/// You may distribute under the terms of the GNU General Public License, as published by the Free Software Foundation, version 2 (see License.txt in the repository root folder).
///
using System;
using System.Diagnostics;
using System.Drawing;
//-----------------------------------------------------------------------------
// Text processing API block
//-----------------------------------------------------------------------------
namespace JetBrains.Omea.OpenAPI
{
/// 2.3
public enum EntryProximity { Phrase = 1, Sentence = 2, Document = 3 }
///
/// DocumentSectionResource describes names of resource types and properties,
/// necessary for referencing of their resources in other components (like
/// AdvancedSearchForm).
///
/// 2.1.3
public class DocumentSectionResource
{
public const string DocSectionResName = "DocumentSection";
public const string SectionHelpDescription = "SectionHelpDescription";
}
///
/// DocumentSection describes possible standard sections of a document. Query
/// processing facilities later then restrict the search to any of these
/// particular sections.
///
public class DocumentSection
{
///
/// Corresponds to the whole body of the document, search will not be restricted at all.
///
public const string BodySection = "All Sections";
///
/// Corresponds to the title/subject of the email, heading of the news article.
///
public const string SubjectSection = "Subject/Heading";
///
/// Corresponds to the annotation of the resource.
///
public const string AnnotationSection = "Annotation";
///
/// Corresponds to the textual representation of authors of the email/article, rss feed.
///
public const string SourceSection = "Source/From";
///
/// Corresponds to the whole body of the document minus Subject/Title. This auxiliary
/// section is used neither in text indexing nor in UI for restricting the search. Its
/// auxiliary purpose is to exclude subject/heading offsets from the given set.
///
public const string NonSubjectSections = "NonSubject";
///
/// restricts the search result (list of search matches) to a particular section.
///
/// Search results to be restricted. May be null.
/// Section to which the results should be restricted.
/// The restricted list of the search results, may be null.
/// 2.0
public static WordPtr[] RestrictResults( WordPtr[] allResults, string desiredSection )
{
if( allResults == null )
return null;
// Count
int fit = 0;
foreach( WordPtr word in allResults )
{
if( isProperSection( word, desiredSection ))
fit++;
}
if( fit == 0 )
return null;
// Extract
WordPtr[] result = new WordPtr[ fit ];
int index = 0;
foreach( WordPtr word in allResults )
{
if( isProperSection( word, desiredSection ) )
{
Debug.Assert(index < fit);
result[ index++ ] = word;
}
}
return result;
}
private static bool isProperSection( WordPtr word, string section )
{
return word.Section == section ||
(section == NonSubjectSections && word.Section != SubjectSection );
}
}
///
/// Specifies possible purposes of requesting the text of a resource through
/// .
///
public enum TextRequestPurpose
{
///
/// The text is requested for indexing. The complete text of the document should be returned.
///
Indexing,
///
/// The text is requested for showing the context of a search result. If extracting the text
/// takes a long time, the extraction should not be performed, and
/// should be called to reject the results
/// from other text providers.
///
ContextExtraction
};
#region Struct WordPtr — A structure that represents an individual search results entry.
///
/// A structure that represents an individual search results entry.
///
public struct WordPtr
{
///
/// Offset of this word from the beginning of the plain-text document representation that was supplied to the .
///
public int StartOffset;
///
/// Document section in which this search result resides, for example, title, body, etc.
///
public string Section;
///
/// Document section Id in which this search result resides.
///
public int SectionId;
///
/// The search result as it occurs in the plain-text document representation.
///
public string Text;
///
/// Original wordform that was present in the query. Different textual
/// representations of the same query token has this attribute equal.
///
public string Original;
public static WordPtr[] Empty = new WordPtr[ 0 ];
#region Error Checks
///
/// Performs a runtime check on the WordPtr contents to ensure that the structure is valid.
/// If not, throws an exception that explains what is wrong.
///
/// 2.0
public void AssertValid()
{
if( (StartOffset < 0) || (StartOffset == int.MaxValue) )
throw new WordPtrException( "The StartOffset field of a WordPtr must be a non-negative finite value." );
if( (Section == null) || (Section.Length <= 0) )
throw new WordPtrException( "The Secion field of a WordPtr must be defined. See DocumentSection structure for the available values." );
// TODO: check the SectionID
if( (Text == null) || (Text.Length <= 0) )
throw new WordPtrException( "The Text field of a WordPtr must be defined." );
if( (Original == null) || (Original.Length <= 0) )
throw new WordPtrException( "The Original field of a WordPtr must be defined." );
}
///
/// Checks whether an array of WordPtrs is valid.
/// A Null value is assumed to be valid by default.
/// See the function for details.
///
/// An array of words to be checked for validness.
/// If True, then all the words in the array must belong to the same document section.
/// If False, no cross-word checks for the value are performed.
/// 2.0
public static void AssertValid( WordPtr[] words, bool inOneSection )
{
if( words == null )
return; // A valid case
// Check the individual WordPtrs, and also collect the section information
string sSection = null;
bool bSectionDiffers = !inOneSection; // Don't even compare the strings if the check is not required (raise the flag initially)
foreach( WordPtr word in words )
{
word.AssertValid();
bSectionDiffers = (bSectionDiffers) || ((sSection != null) && (sSection != word.Section)); // If a section has been assigned and now differs, then raise the error flag; never lower it
sSection = word.Section;
}
// Issue the section error, if needed
if( (bSectionDiffers) && (inOneSection) )
throw new WordPtrException( "All the WordPtrs in the array must belong to the same section." );
}
///
/// An exception that is thrown by this class.
///
/// 2.0
public class WordPtrException : Exception
{
public WordPtrException( string errortext )
: base( errortext )
{
}
}
#endregion
}
#endregion
///
/// Interface describes the core text-indexing engine which consumes the text
/// fragments, tokenizes them and constructs index chunks.
///
/// Fragments for a single document must be submitted consequently,
/// otherwise engine will decide that a new version of a document is queued for
/// indexing.
public interface IResourceTextConsumer
{
///
/// Submit a header/subject/title fragment of a resource.
///
/// A resource Id from which the fragment is taken.
/// Fragment text.
void AddDocumentHeading( int resourceId, string text );
///
/// Submit a fragment of a resource.
///
/// A resource Id from which the fragment is taken.
/// Fragment text.
void AddDocumentFragment( int resourceId, string text );
///
/// Submit a fragment of a resource from a particular named section.
///
/// A resource Id from which the fragment is taken.
/// Fragment text.
/// Name of a section from which the fragment is taken.
void AddDocumentFragment( int resourceId, string text, string sectionName );
///
/// Method specifies the amount to be added to the starting offset
/// of the next fragment.
///
/// Number by which the starting offset must be increased.
/// Must be positive.
/// 2.0
void IncrementOffset( int spacesAmount );
///
/// Start counting the token offset from 0 for all subsequent fragments of
/// the same document.
///
void RestartOffsetCounting();
///
/// Do not account the fragments collected so far for the currently processed
/// document if not all fragments can be submitted for some particular reason.
///
void RejectResult();
///
/// Get the purpose of the current IResourceTextConsumer instance -
/// index construction or context construction.
///
TextRequestPurpose Purpose { get; }
}
///
/// Describes the offset of a highlighed section in a search result context.
///
/// 2.0
public class OffsetData
{
///
/// Creates an offset data with the specified start and length.
///
/// The start of a highlighted section.
/// The length of a highlighted section.
public OffsetData( int start, int len )
{
Start = start;
Length = len;
}
///
/// The start of a highlighted section.
///
public int Start;
///
/// The length of a highlighted section.
///
public int Length;
}
///
/// Allows to receive search highlight and context data for specific resources.
///
public interface IHighlightDataProvider
{
///
/// Returns the search result highlighting data for the specified resource.
///
/// The resource for which the highlighting data is retrieved.
/// The returned array of search result records.
/// true if highlighting data for the specified resource was found, false otherwise.
/// An implementation of this interface for a specific search is returned
/// by .
bool GetHighlightData( IResource res, out WordPtr[] words );
///
/// Requests asynchronous context retrieval for the specified list of resource IDs.
///
/// The list of resource IDs for which contexts are requested.
/// The contexts are stored in a virtual property "Context" which is managed
/// by the property provider attached to the search
/// result resource list.
void RequestContexts( int[] resourceIDs );
///
/// Retrieves the context for the specified resource.
///
/// The resource for which the context is retrieved.
/// The context, or null if the context is not available.
/// 2.0
string GetContext( IResource res );
///
/// Return an array of highlighted tokens in the context string in the format:
/// { offset in context, highlight length }.
///
/// The resource for which the information is retrieved.
/// Array of highlight data pairs.
/// 2.0
OffsetData[] GetContextHighlightData( IResource res );
}
///
/// Describes the array of documents which are available for searching.
///
public class DocsArrayArgs : EventArgs
{
public DocsArrayArgs( int[] docs )
{
DocsArray = new int[ (docs == null) ? 0 : docs.Length ];
if( docs != null )
Array.Copy( docs, DocsArray, docs.Length );
}
///
/// Returns the array of IDs of resources which are available for searching.
///
/// The array of document IDs.
public int[] GetDocuments()
{
return DocsArray;
}
private int[] DocsArray;
}
///
/// Callback defines an event when a number of documents becomes available
/// for searching.
///
public delegate void UpdateFinishedEventHandler( object sender, DocsArrayArgs docIds );
///
/// Interface controls the submission of the documents to the text-index
/// processing - manage the queue of text-indexing jobs, handle the events
/// on different text index states, handle exceptional situations in the
/// text index structure (text index corruption).
///
public interface ITextIndexManager
{
///
/// Queue a resource for text-indexing.
///
/// Id of a resource.
void QueryIndexing( int resourceId );
///
/// Queue a deletion of a resource from the text index.
///
/// Id of a resource.
void DeleteDocumentQueued( int resourceId );
///
/// Delete current text index, build a new one from scratch. Usually this
/// method is used when some exceptional situation is met which causes
/// text index corruption.
///
void RebuildIndex();
///
/// Determines whether text index files are present.
///
/// True if valid text index is present.
bool IsIndexPresent();
///
/// Determines whether a particular document is indexed.
///
/// Id of a resource.
/// True if the document text was indexed.
bool IsDocumentInIndex( int resourceId );
///
/// Registers a callback which is called when new batch of documents has
/// been indexed and is available for searching.
///
/// The callback to register.
void SetUpdateResultHandler( UpdateFinishedEventHandler callback );
///
/// Return a list of resources, textual representation of which mathces
/// the query.
///
/// Query string.
/// List of resources matching the query.
IResourceList ProcessQuery( string searchQuery );
///
/// Return a list of resources, textual representation of which mathces
/// the query. Additionally specify whether to start the process of
/// contexts extraction (extraction is done asynchronously).
///
/// Query string.
/// List of resource Ids within which the search is to be performed.
/// Auxiliary structure to be passed
/// to the ResourceBrowser.
/// List of stopwords found during parsing the query.
/// Contains message describing the error occured during parsing the query.
/// List of resources matching the query.
/// 2.0
IResourceList ProcessQuery( string searchQuery, int[] restrictByIds,
out IHighlightDataProvider highlightDataProvider,
out string[] stopList, out string parsingErrorMsg );
///
/// Match a resource over the query using TextQueriesOptimizationManager"
/// class which interacts with FilterRegistry and FullTextIndexer on per-document
/// basis when they are to appear in the text index.
///
/// 2.3 (2.5) (Grenache)
bool MatchQuery( string query, IResource res );
/// Get or set whether text indexing operations are carried out during
/// periods when the computer is in the idle mode.
bool IdleIndexingMode { get; set; }
///
/// Event is fired when text index construction is complete, that is when
/// there has been constructed at least one chunk of index over which the
/// search is possible.
///
event EventHandler IndexLoaded;
//---------------------------------------------------------------------
// Search providers management
//---------------------------------------------------------------------
void RegisterSearchProvider( ISearchProvider host, string title );
void RegisterSearchProvider( ISearchProvider host, string title, string groupName );
void UnregisterSearchProvider( ISearchProvider host );
string GetSearchProviderTitle ( ISearchProvider host );
ISearchProvider CurrentSearchProvider { get; set; }
ISearchProvider[] GetSearchProviders();
string[] GetSearchProviderGroups();
ISearchProvider[] GetSearchProvidersInGroup( string group );
}
///
/// Allows to register specially formatted phrases, which being added to the
/// end of the search query allow (after parsing) to restrict the search
/// result without explicit usage of Advanced Search capabilities.
///
/// 2.2
public interface ISearchQueryExtensions
{
///
/// Register phrase "anchor displayType" which (after parsing) restricts
/// the search result to resources of "resType" type.
/// Example: RegisterResourceTypeRestriction( "in", "news", "Article" )
/// "... in news" - restricts search result to news articles.
///
void RegisterResourceTypeRestriction( string anchor, string displayType, string resType );
///
/// Register phrase "anchor token" which (after parsing) restricts
/// the search result to resources conforming to "stdCondition" condition.
/// Example: RegisterSingleTokenRestriction( "in", "unread", conditionResource )
/// "... in unread" - restricts search result to those which are
/// not read yet.
///
void RegisterSingleTokenRestriction ( string anchor, string token, IResource stdCondition );
///
/// Register phrase "anchor text" which gives this text for parsing to the
/// IQueryTokenMatcher object. If IQueryTokenMatcher manages to parse the
/// "text" (that is to extract proper parameters to some ConditionTemplate)
/// then it produces an instance of that ConditionTemplate as the
/// instantiated Condition (proxy condition).
/// Example: RegisterFreestyleRestriction( "from", fromMatcher )
/// ".. from Greg" - restrict search result to those which came
/// from a person with "Greg" as a first or last name.
///
void RegisterFreestyleRestriction ( string anchor, IQueryTokenMatcher matcher );
///
/// Get a resource type name registered for given anchor and a token
/// from the query.
///
/// Resource type name if such is registered for given anchor and token,
/// NULL if no such combination is registered
string GetResourceTypeRestriction( string anchor, string token );
///
/// Get a condition resource registered for given anchor and a token
/// from the query.
///
/// A condition if such is registered for given anchor and token,
/// NULL if no such combination is registered.
IResource GetSingleTokenRestriction( string anchor, string token );
///
/// Get a generated condition resource (proxy condition as a result of
/// condition template instantiation) registered for given anchor and
/// a parseable text from the query.
///
/// A condition if such is registered for given anchor and
/// text is parsable into template parameters, NULL if no such combination
/// is found.
IResource GetMatchingFreestyleRestriction( string anchor, string text );
///
/// Retrieve all registered anchors.
///
string[] GetAllAnchors();
}
///
/// Interface for handlers of parts of the search query string starting after the
/// registered anchor. Handler is responsible for matching of the text with possible
/// parameters of a handler-defined condition template.
///
/// 2.2
public interface IQueryTokenMatcher
{
///
/// Parse token stream, produce parameters for a [particular] condition template,
/// and instantiate this template for producing a proxy condition which then
/// will be used to restrict the search result set.
///
IResource ParseTokenStream( string tokens );
}
public interface ISearchProvider
{
string Title { get; }
Icon Icon { get; }
void ProcessQuery( string query );
}
}