/// /// Copyright © 2003-2008 JetBrains s.r.o. /// You may distribute under the terms of the GNU General Public License, as published by the Free Software Foundation, version 2 (see License.txt in the repository root folder). /// using System; using System.Diagnostics; using System.Drawing; //----------------------------------------------------------------------------- // Text processing API block //----------------------------------------------------------------------------- namespace JetBrains.Omea.OpenAPI { /// 2.3 public enum EntryProximity { Phrase = 1, Sentence = 2, Document = 3 } /// /// DocumentSectionResource describes names of resource types and properties, /// necessary for referencing of their resources in other components (like /// AdvancedSearchForm). /// /// 2.1.3 public class DocumentSectionResource { public const string DocSectionResName = "DocumentSection"; public const string SectionHelpDescription = "SectionHelpDescription"; } /// /// DocumentSection describes possible standard sections of a document. Query /// processing facilities later then restrict the search to any of these /// particular sections. /// public class DocumentSection { /// /// Corresponds to the whole body of the document, search will not be restricted at all. /// public const string BodySection = "All Sections"; /// /// Corresponds to the title/subject of the email, heading of the news article. /// public const string SubjectSection = "Subject/Heading"; /// /// Corresponds to the annotation of the resource. /// public const string AnnotationSection = "Annotation"; /// /// Corresponds to the textual representation of authors of the email/article, rss feed. /// public const string SourceSection = "Source/From"; /// /// Corresponds to the whole body of the document minus Subject/Title. This auxiliary /// section is used neither in text indexing nor in UI for restricting the search. Its /// auxiliary purpose is to exclude subject/heading offsets from the given set. /// public const string NonSubjectSections = "NonSubject"; /// /// restricts the search result (list of search matches) to a particular section. /// /// Search results to be restricted. May be null. /// Section to which the results should be restricted. /// The restricted list of the search results, may be null. /// 2.0 public static WordPtr[] RestrictResults( WordPtr[] allResults, string desiredSection ) { if( allResults == null ) return null; // Count int fit = 0; foreach( WordPtr word in allResults ) { if( isProperSection( word, desiredSection )) fit++; } if( fit == 0 ) return null; // Extract WordPtr[] result = new WordPtr[ fit ]; int index = 0; foreach( WordPtr word in allResults ) { if( isProperSection( word, desiredSection ) ) { Debug.Assert(index < fit); result[ index++ ] = word; } } return result; } private static bool isProperSection( WordPtr word, string section ) { return word.Section == section || (section == NonSubjectSections && word.Section != SubjectSection ); } } /// /// Specifies possible purposes of requesting the text of a resource through /// . /// public enum TextRequestPurpose { /// /// The text is requested for indexing. The complete text of the document should be returned. /// Indexing, /// /// The text is requested for showing the context of a search result. If extracting the text /// takes a long time, the extraction should not be performed, and /// should be called to reject the results /// from other text providers. /// ContextExtraction }; #region Struct WordPtr — A structure that represents an individual search results entry. /// /// A structure that represents an individual search results entry. /// public struct WordPtr { /// /// Offset of this word from the beginning of the plain-text document representation that was supplied to the . /// public int StartOffset; /// /// Document section in which this search result resides, for example, title, body, etc. /// public string Section; /// /// Document section Id in which this search result resides. /// public int SectionId; /// /// The search result as it occurs in the plain-text document representation. /// public string Text; /// /// Original wordform that was present in the query. Different textual /// representations of the same query token has this attribute equal. /// public string Original; public static WordPtr[] Empty = new WordPtr[ 0 ]; #region Error Checks /// /// Performs a runtime check on the WordPtr contents to ensure that the structure is valid. /// If not, throws an exception that explains what is wrong. /// /// 2.0 public void AssertValid() { if( (StartOffset < 0) || (StartOffset == int.MaxValue) ) throw new WordPtrException( "The StartOffset field of a WordPtr must be a non-negative finite value." ); if( (Section == null) || (Section.Length <= 0) ) throw new WordPtrException( "The Secion field of a WordPtr must be defined. See DocumentSection structure for the available values." ); // TODO: check the SectionID if( (Text == null) || (Text.Length <= 0) ) throw new WordPtrException( "The Text field of a WordPtr must be defined." ); if( (Original == null) || (Original.Length <= 0) ) throw new WordPtrException( "The Original field of a WordPtr must be defined." ); } /// /// Checks whether an array of WordPtrs is valid. /// A Null value is assumed to be valid by default. /// See the function for details. /// /// An array of words to be checked for validness. /// If True, then all the words in the array must belong to the same document section. /// If False, no cross-word checks for the value are performed. /// 2.0 public static void AssertValid( WordPtr[] words, bool inOneSection ) { if( words == null ) return; // A valid case // Check the individual WordPtrs, and also collect the section information string sSection = null; bool bSectionDiffers = !inOneSection; // Don't even compare the strings if the check is not required (raise the flag initially) foreach( WordPtr word in words ) { word.AssertValid(); bSectionDiffers = (bSectionDiffers) || ((sSection != null) && (sSection != word.Section)); // If a section has been assigned and now differs, then raise the error flag; never lower it sSection = word.Section; } // Issue the section error, if needed if( (bSectionDiffers) && (inOneSection) ) throw new WordPtrException( "All the WordPtrs in the array must belong to the same section." ); } /// /// An exception that is thrown by this class. /// /// 2.0 public class WordPtrException : Exception { public WordPtrException( string errortext ) : base( errortext ) { } } #endregion } #endregion /// /// Interface describes the core text-indexing engine which consumes the text /// fragments, tokenizes them and constructs index chunks. /// /// Fragments for a single document must be submitted consequently, /// otherwise engine will decide that a new version of a document is queued for /// indexing. public interface IResourceTextConsumer { /// /// Submit a header/subject/title fragment of a resource. /// /// A resource Id from which the fragment is taken. /// Fragment text. void AddDocumentHeading( int resourceId, string text ); /// /// Submit a fragment of a resource. /// /// A resource Id from which the fragment is taken. /// Fragment text. void AddDocumentFragment( int resourceId, string text ); /// /// Submit a fragment of a resource from a particular named section. /// /// A resource Id from which the fragment is taken. /// Fragment text. /// Name of a section from which the fragment is taken. void AddDocumentFragment( int resourceId, string text, string sectionName ); /// /// Method specifies the amount to be added to the starting offset /// of the next fragment. /// /// Number by which the starting offset must be increased. /// Must be positive. /// 2.0 void IncrementOffset( int spacesAmount ); /// /// Start counting the token offset from 0 for all subsequent fragments of /// the same document. /// void RestartOffsetCounting(); /// /// Do not account the fragments collected so far for the currently processed /// document if not all fragments can be submitted for some particular reason. /// void RejectResult(); /// /// Get the purpose of the current IResourceTextConsumer instance - /// index construction or context construction. /// TextRequestPurpose Purpose { get; } } /// /// Describes the offset of a highlighed section in a search result context. /// /// 2.0 public class OffsetData { /// /// Creates an offset data with the specified start and length. /// /// The start of a highlighted section. /// The length of a highlighted section. public OffsetData( int start, int len ) { Start = start; Length = len; } /// /// The start of a highlighted section. /// public int Start; /// /// The length of a highlighted section. /// public int Length; } /// /// Allows to receive search highlight and context data for specific resources. /// public interface IHighlightDataProvider { /// /// Returns the search result highlighting data for the specified resource. /// /// The resource for which the highlighting data is retrieved. /// The returned array of search result records. /// true if highlighting data for the specified resource was found, false otherwise. /// An implementation of this interface for a specific search is returned /// by . bool GetHighlightData( IResource res, out WordPtr[] words ); /// /// Requests asynchronous context retrieval for the specified list of resource IDs. /// /// The list of resource IDs for which contexts are requested. /// The contexts are stored in a virtual property "Context" which is managed /// by the property provider attached to the search /// result resource list. void RequestContexts( int[] resourceIDs ); /// /// Retrieves the context for the specified resource. /// /// The resource for which the context is retrieved. /// The context, or null if the context is not available. /// 2.0 string GetContext( IResource res ); /// /// Return an array of highlighted tokens in the context string in the format: /// { offset in context, highlight length }. /// /// The resource for which the information is retrieved. /// Array of highlight data pairs. /// 2.0 OffsetData[] GetContextHighlightData( IResource res ); } /// /// Describes the array of documents which are available for searching. /// public class DocsArrayArgs : EventArgs { public DocsArrayArgs( int[] docs ) { DocsArray = new int[ (docs == null) ? 0 : docs.Length ]; if( docs != null ) Array.Copy( docs, DocsArray, docs.Length ); } /// /// Returns the array of IDs of resources which are available for searching. /// /// The array of document IDs. public int[] GetDocuments() { return DocsArray; } private int[] DocsArray; } /// /// Callback defines an event when a number of documents becomes available /// for searching. /// public delegate void UpdateFinishedEventHandler( object sender, DocsArrayArgs docIds ); /// /// Interface controls the submission of the documents to the text-index /// processing - manage the queue of text-indexing jobs, handle the events /// on different text index states, handle exceptional situations in the /// text index structure (text index corruption). /// public interface ITextIndexManager { /// /// Queue a resource for text-indexing. /// /// Id of a resource. void QueryIndexing( int resourceId ); /// /// Queue a deletion of a resource from the text index. /// /// Id of a resource. void DeleteDocumentQueued( int resourceId ); /// /// Delete current text index, build a new one from scratch. Usually this /// method is used when some exceptional situation is met which causes /// text index corruption. /// void RebuildIndex(); /// /// Determines whether text index files are present. /// /// True if valid text index is present. bool IsIndexPresent(); /// /// Determines whether a particular document is indexed. /// /// Id of a resource. /// True if the document text was indexed. bool IsDocumentInIndex( int resourceId ); /// /// Registers a callback which is called when new batch of documents has /// been indexed and is available for searching. /// /// The callback to register. void SetUpdateResultHandler( UpdateFinishedEventHandler callback ); /// /// Return a list of resources, textual representation of which mathces /// the query. /// /// Query string. /// List of resources matching the query. IResourceList ProcessQuery( string searchQuery ); /// /// Return a list of resources, textual representation of which mathces /// the query. Additionally specify whether to start the process of /// contexts extraction (extraction is done asynchronously). /// /// Query string. /// List of resource Ids within which the search is to be performed. /// Auxiliary structure to be passed /// to the ResourceBrowser. /// List of stopwords found during parsing the query. /// Contains message describing the error occured during parsing the query. /// List of resources matching the query. /// 2.0 IResourceList ProcessQuery( string searchQuery, int[] restrictByIds, out IHighlightDataProvider highlightDataProvider, out string[] stopList, out string parsingErrorMsg ); /// /// Match a resource over the query using TextQueriesOptimizationManager" /// class which interacts with FilterRegistry and FullTextIndexer on per-document /// basis when they are to appear in the text index. /// /// 2.3 (2.5) (Grenache) bool MatchQuery( string query, IResource res ); /// Get or set whether text indexing operations are carried out during /// periods when the computer is in the idle mode. bool IdleIndexingMode { get; set; } /// /// Event is fired when text index construction is complete, that is when /// there has been constructed at least one chunk of index over which the /// search is possible. /// event EventHandler IndexLoaded; //--------------------------------------------------------------------- // Search providers management //--------------------------------------------------------------------- void RegisterSearchProvider( ISearchProvider host, string title ); void RegisterSearchProvider( ISearchProvider host, string title, string groupName ); void UnregisterSearchProvider( ISearchProvider host ); string GetSearchProviderTitle ( ISearchProvider host ); ISearchProvider CurrentSearchProvider { get; set; } ISearchProvider[] GetSearchProviders(); string[] GetSearchProviderGroups(); ISearchProvider[] GetSearchProvidersInGroup( string group ); } /// /// Allows to register specially formatted phrases, which being added to the /// end of the search query allow (after parsing) to restrict the search /// result without explicit usage of Advanced Search capabilities. /// /// 2.2 public interface ISearchQueryExtensions { /// /// Register phrase "anchor displayType" which (after parsing) restricts /// the search result to resources of "resType" type. /// Example: RegisterResourceTypeRestriction( "in", "news", "Article" ) /// "... in news" - restricts search result to news articles. /// void RegisterResourceTypeRestriction( string anchor, string displayType, string resType ); /// /// Register phrase "anchor token" which (after parsing) restricts /// the search result to resources conforming to "stdCondition" condition. /// Example: RegisterSingleTokenRestriction( "in", "unread", conditionResource ) /// "... in unread" - restricts search result to those which are /// not read yet. /// void RegisterSingleTokenRestriction ( string anchor, string token, IResource stdCondition ); /// /// Register phrase "anchor text" which gives this text for parsing to the /// IQueryTokenMatcher object. If IQueryTokenMatcher manages to parse the /// "text" (that is to extract proper parameters to some ConditionTemplate) /// then it produces an instance of that ConditionTemplate as the /// instantiated Condition (proxy condition). /// Example: RegisterFreestyleRestriction( "from", fromMatcher ) /// ".. from Greg" - restrict search result to those which came /// from a person with "Greg" as a first or last name. /// void RegisterFreestyleRestriction ( string anchor, IQueryTokenMatcher matcher ); /// /// Get a resource type name registered for given anchor and a token /// from the query. /// /// Resource type name if such is registered for given anchor and token, /// NULL if no such combination is registered string GetResourceTypeRestriction( string anchor, string token ); /// /// Get a condition resource registered for given anchor and a token /// from the query. /// /// A condition if such is registered for given anchor and token, /// NULL if no such combination is registered. IResource GetSingleTokenRestriction( string anchor, string token ); /// /// Get a generated condition resource (proxy condition as a result of /// condition template instantiation) registered for given anchor and /// a parseable text from the query. /// /// A condition if such is registered for given anchor and /// text is parsable into template parameters, NULL if no such combination /// is found. IResource GetMatchingFreestyleRestriction( string anchor, string text ); /// /// Retrieve all registered anchors. /// string[] GetAllAnchors(); } /// /// Interface for handlers of parts of the search query string starting after the /// registered anchor. Handler is responsible for matching of the text with possible /// parameters of a handler-defined condition template. /// /// 2.2 public interface IQueryTokenMatcher { /// /// Parse token stream, produce parameters for a [particular] condition template, /// and instantiate this template for producing a proxy condition which then /// will be used to restrict the search result set. /// IResource ParseTokenStream( string tokens ); } public interface ISearchProvider { string Title { get; } Icon Icon { get; } void ProcessQuery( string query ); } }