Query Sitecore Lucene Index Directly

Sitecore has many different ways to utilize the search index, all of them are behind some wrapper class. Sitecore officially recommends that you avoid utilizing the search index without using Sitecore as a proxy, however this does come at a performance hit. Most of us live in a world where optimizations are welcome things, shaving a few milliseconds of compute time per page load can have huge impacts on the robustness of a website. I propose that we utilize the search index without any proxy.

Populate your index with meaningful data

I’m not going to go into this in too much detail as others have already covered this sufficently. Kamsar has built a powerful system here to index content in a component driven world.  I highly suggest it as a read.

Configure your index

By default all indexes are set up to be a single index that completely goes down during a re-build.  Clearly not acceptable for any production applications.  Sitecore has a solution for this in their SwitchOnRebuildLuceneIndex.  Essentially it works by swapping to a standby index whenever a full build is performed.  However this is clearly one of the reasons that Sitecore would prefer that you use their wrappers to query the index, out of the box there is no way to identify which index is currently active.  However i have come up with a solution to this problem by exposing a flag that informs which index is active.  This code is essentially the exact same thing as Sitecore’s SwitchOnRebuildLuceneIndex except with the addition of the IsPrimary method.

	public class CustomSwitchOnRebuild : LuceneIndex
	{
		private readonly IShardFactory switchOnRebuildShardFactory = (IShardFactory)new LuceneSwitchOnRebuildShardFactory();
		private readonly object fullRebuildLockObject = new object();
		private SwitchOnRebuildMode mode;

		public override IShardFactory ShardFactory
		{
			get
			{
				return this.switchOnRebuildShardFactory;
			}
		}

		public bool IsPrimary()
		{
			return this.mode != SwitchOnRebuildMode.Primary;
		}
		public CustomSwitchOnRebuild(string name, string folder, IIndexPropertyStore propertyStore)
		  : base(name, folder, propertyStore)
		{
		}

		protected CustomSwitchOnRebuild(string name)
		  : base(name)
		{
		}

		protected override void DoReset(IProviderUpdateContext context)
		{
			LuceneFullRebuildContext fullRebuildContext = context as LuceneFullRebuildContext;
			if (fullRebuildContext != null)
			{
				fullRebuildContext.Reset();
				fullRebuildContext.ReinitializeWriters();
			}
			else
				base.DoReset(context);
		}

		protected override void DoRebuild(IProviderUpdateContext context, IndexingOptions indexingOptions, CancellationToken cancellationToken)
		{
			base.DoRebuild(context, indexingOptions, cancellationToken);
			this.SwitchDirectories();
		}

		protected override void InitializeShards()
		{
			CrawlingLog.Log.Debug(string.Format("[Index={0}] Creating primary and secondary directories", (object)this.Name), (Exception)null);
			base.InitializeShards();
			string str = this.PropertyStore.Get(IndexProperties.ReadUpdateDirectory);
			bool flag = false;
			if (!string.IsNullOrEmpty(str))
			{
				CrawlingLog.Log.Debug(string.Format("[Index={0}] Resolving directories from index property store for index '{0}'", (object)this.Name), (Exception)null);
				if (Enum.TryParse<SwitchOnRebuildMode>(str, out this.mode))
					flag = true;
			}
			if (!flag)
			{
				CrawlingLog.Log.Debug(string.Format("[Index={0}] Resolving directories by last time modified.", (object)this.Name), (Exception)null);
				long num1 = long.MinValue;
				long num2 = long.MinValue;
				foreach (LuceneSwitchOnRebuildShard switchOnRebuildShard in Enumerable.Cast<LuceneSwitchOnRebuildShard>((IEnumerable)this.Shards))
				{
					try
					{
						long num3 = IndexReader.LastModified(switchOnRebuildShard.PrimaryDirectory);
						long num4 = IndexReader.LastModified(switchOnRebuildShard.SecondaryDirectory);
						CrawlingLog.Log.Debug(string.Format("[Index={0}, Shard={1}] Primary directory last modified = '{2}'.", (object)this.Name, (object)switchOnRebuildShard, (object)num3), (Exception)null);
						CrawlingLog.Log.Debug(string.Format("[Index={0}, Shard={1}] Secondary directory last modified = '{2}'.", (object)this.Name, (object)switchOnRebuildShard, (object)num4), (Exception)null);
						if (num3 > num1)
							num1 = num3;
						if (num4 > num2)
							num2 = num4;
					}
					catch (FileNotFoundException ex)
					{
						if (!ex.Message.StartsWith("no segments"))
							throw;
						else
							num1 = num2 = 0L;
					}
				}
				this.mode = num1 >= num2 ? SwitchOnRebuildMode.Primary : SwitchOnRebuildMode.Secondary;
			}
			this.SwitchDirectories(this.mode);
		}

		protected void SwitchDirectories()
		{
			this.SwitchDirectories(this.mode == SwitchOnRebuildMode.Primary ? SwitchOnRebuildMode.Secondary : SwitchOnRebuildMode.Primary);
		}

		private void SwitchDirectories(SwitchOnRebuildMode newMode)
		{
			lock (this)
			{
				foreach (LuceneSwitchOnRebuildShard item_0 in this.Shards)
				{
					item_0.SwitchDirectories(newMode);
					Assert.IsTrue((item_0.Mode == newMode ? 1 : 0) != 0, "[Index={0}, Shard={1}] SwitchOnRebuildShard not set in {2} mode. Shard mode: {3}", (object)this.Name, (object)item_0, (object)this.mode, (object)item_0.Mode);
				}
				this.mode = newMode;
				this.PropertyStore.Set(IndexProperties.ReadUpdateDirectory, this.mode.ToString());
				this.PropertyStore.Set(IndexProperties.FullRebuildDirectory, ((SwitchOnRebuildMode)(this.mode == SwitchOnRebuildMode.Primary ? 1 : 0)).ToString());
			}
		}

		protected override IProviderUpdateContext CreateFullRebuildContext()
		{
			this.EnsureInitialized();
			ICommitPolicyExecutor commitPolicyExecutor = (ICommitPolicyExecutor)this.CommitPolicyExecutor.Clone();
			commitPolicyExecutor.Initialize((ISearchIndex)this);
			return (IProviderUpdateContext)new LuceneCustomFullRebuildContext(this, commitPolicyExecutor);
		}

		protected override object GetFullRebuildLockObject()
		{
			return this.fullRebuildLockObject;
		}
	}
	public class LuceneCustomFullRebuildContext : LuceneUpdateContext
	{
		public LuceneCustomFullRebuildContext(CustomSwitchOnRebuild index, ICommitPolicyExecutor commitPolicyExecutor)
			: base((ILuceneProviderIndex)index, commitPolicyExecutor)
		{
		}

		public void ReinitializeWriters()
		{
			this.InitializeWriters();
		}
	}

Accessing Lucene

First of all you’ll need Lucene.net which Sitecore ships with, so you can find it in the enormous list of dlls Sitecore comes with.

Sitecore’s Lucene search indexes are stored in Sitecore under the data directory. Here is a useful bit of code to identify the root index folder within Sitecore’s data directory.  It works by identifying if the data folder is defined as an absolute path or a relative path and uncovering the absolute path either way.

		private readonly string indexDirectory;
		public MyClassesConstructor()
		{
			if (Regex.IsMatch(Settings.DataFolder, @"^(([a-zA-Z]:\\)|(//)).*"))
			{
				this.indexDirectory = Settings.DataFolder + @"\indexes\";
			}
			else
			{
				this.indexDirectory = HttpRuntime.AppDomainAppPath + Settings.DataFolder.Substring(1)
						   + @"\indexes\";
			}
		}

Next you’ll need a way of populating the FSDirectory that Lucene needs to identify the index.  This step is utilizing our IsPrimary method we added to the switching index above in the first code block.

		private readonly Dictionary<string, FSDirectory> directories = new Dictionary<string, FSDirectory>();

		///
<summary>
		/// Gets the directory.
		/// </summary>

		private FSDirectory Directory
		{
			get
			{
				var indexable = new SitecoreIndexableItem(Context.Item);
				var index = ContentSearchManager.GetIndex(indexable);

				if (!this.directories.ContainsKey(Context.Database.Name))
				{
					this.directories.Add(Context.Database.Name, FSDirectory.Open(this.indexDirectory + index.Name));
				}

				if (index != null && !index.IsPrimary())
				{
					if (!this.directories.ContainsKey(Context.Database.Name + "_sec"))
					{
						this.directories.Add(Context.Database.Name + "_sec", FSDirectory.Open(this.indexDirectory + index.Name + "_sec"));
					}

					return this.directories[Context.Database.Name + "_sec"];
				}

				return this.directories[Context.Database.Name];
			}
		}

You’ll need a parser to identify which fields you need to return.  You can add any fields your queries will find useful.  Additionally with minor modifications it could be made to be passed into the index query.

		private static MultiFieldQueryParser parser = new MultiFieldQueryParser(
				Version.LUCENE_30,
				new[] { "_content", "_template", "_location", "_searchfacets" },
				new StandardAnalyzer(Version.LUCENE_30));

You’ll also need a way to parse the lucene syntax query.

		private static Query ParseQuery(string searchQuery)
		{
			Query query;
			try
			{
				query = parser.Parse(searchQuery.Trim());
			}
			catch (ParseException)
			{
				query = parser.Parse(QueryParser.Escape(searchQuery.Trim()));
			}

			return query;
		}

Now we can query the index.  To do this we’ll pass in a standard Lucene query syntax expression and get items as results.  I found this site useful as a reference.

		public IEnumerable<Item> SearchForItems(string luceneQuery)
		{
			using (var searcher = new IndexSearcher(this.Directory, true))
			{
				var topDocs = searcher.Search(ParseQuery(luceneQuery), int.MaxValue);
				foreach (var doc in topDocs.ScoreDocs)
				{
					yield return Context.Database.GetItem(new ID(searcher.Doc(doc.Doc).Get("_group")));
				}
			}
		}

Results

I’ve found a significant speed increase can be attained through this method.  Easily returning complex search results in less than 30 milliseconds.  Lucene’s search index is truly an amazing piece of technology.

var itemsThatReferenceLucene = SearchContext.SearchForItems("_content:lucene");
foreach(Item luceneReference in itemsThatReferenceLucene)
{
...
}