| |
| Navigation |
 |
| |










|
|
| Sample IFilter |
 |
Startseite \ Writing IFilters in .NET \ Sample IFilter \
IFilter for TXT files based on the IFilter Template
This sample show how easy it could and should(!) be to write IFilters.
The source code is rearranged to show the esential parts:
// 1. Add some attributes and derive the class from a base class [SearchExtensions(".test")] // IFitler for the file type ".test" [Guid("E515F213-DECC-4508-BFE9-4CEB4518CEFA")] // CLSID for this class [ComVisible(true)] public sealed class DummyIFilter : BaseFilter .... // 2. Implement this function, this is the start of the content reading process protected override void InitializeChunks() { // Normally you would provide the document type here, search engines like it. ValueChunk typeChunk = new ValueChunk(); typeChunk.Value = "document/text"; typeChunk.PropSet = FMTID_UserDefinedProperties; typeChunk.Kind = PROPSPECKIND.PRSPEC_LPWSTR; typeChunk.ID = _perceivedTypeStr; AddChunk(typeChunk); // You can emit a lot of more value chunks here // You can even emit some or all of the TextChuks here } // 3. Implement this functions, which returns the content of the file block bz block protected override void CreateChunks() { // Make some reading form the file ... byte[] content = new byte[8192]; int length = InputStream.Read(content, 0, 8192);
// Check if we reached the end if (length == 0) return;
// Parse/Convert the read content // Your work starts here, you have to know how to deal with your file format !
// In case of the ASCII file we just encode it :-) string parsedText = System.Text.Encoding.ASCII.GetString(content, 0, length);
// ... and create a new text chunk, itīs up to you if you emit more than one chunk or how you split the content in chunks TextChunk contentChunk = new TextChunk(); contentChunk.Value = parsedText;
// Often new chunks start as a new topic or item, or just because the buffer is fully, as in our case contentChunk.BreakType = CHUNK_BREAKTYPE.CHUNK_NO_BREAK;
// To help search engines you should eveluate the language of the chunk, or just fill it with some standard value contentChunk.Language = System.Globalization.CultureInfo.CurrentCulture; // ... and fill out the other fields with the standard values contentChunk.NextPos = 0; contentChunk.PropSet = new Guid("B725F130-47EF-101A-A5F1-02608C9EEBAC"); // Contents GUID contentChunk.Kind = PROPSPECKIND.PRSPEC_PROPID; contentChunk.ID = new IntPtr(0x13); // Contents ID AddChunk(contentChunk); } }
Thatīs all ! |
| |
|