elasticlunr

Index

constructor
elasticlunr.Index()

elasticlunr.Index is object that manages a search index. It contains the indexes
and stores all the tokens and document lookups. It also provides the main
user facing API for the library.

elasticlunr.Index = function () {
  this._fields = [];
  this._ref = 'id';
  this.pipeline = new elasticlunr.Pipeline;
  this.documentStore = new elasticlunr.DocumentStore;
  this.index = {};
  this.eventEmitter = new elasticlunr.EventEmitter;
  this._idfCache = {};

  this.on('add', 'remove', 'update', (function () {
    this._idfCache = {};
  }).bind(this));
};

on

method
elasticlunr.Index.prototype.on()

Option name Type Description
[eventName] String

The name(s) of events to bind the function to.

fn Function

The serialised set to load.

Bind a handler to events being emitted by the index.

The handler can be bound to many events at the same time.

elasticlunr.Index.prototype.on = function () {
  var args = Array.prototype.slice.call(arguments);
  return this.eventEmitter.addListener.apply(this.eventEmitter, args);
};

off

method
elasticlunr.Index.prototype.off()

Option name Type Description
eventName String

The name of events to remove the function from.

fn Function

The serialised set to load.

Removes a handler from an event being emitted by the index.

elasticlunr.Index.prototype.off = function (name, fn) {
  return this.eventEmitter.removeListener(name, fn);
};

load

method
elasticlunr.Index.load()

Option name Type Description
serialisedData Object

The serialised set to load.

return elasticlunr.Index

Loads a previously serialised index.

Issues a warning if the index being imported was serialised
by a different version of elasticlunr.

elasticlunr.Index.load = function (serialisedData) {
  if (serialisedData.version !== elasticlunr.version) {
    elasticlunr.utils.warn('version mismatch: current '
                    + elasticlunr.version + ' importing ' + serialisedData.version);
  }

  var idx = new this;

  idx._fields = serialisedData.fields;
  idx._ref = serialisedData.ref;
  idx.documentStore = elasticlunr.DocumentStore.load(serialisedData.documentStore);
  idx.pipeline = elasticlunr.Pipeline.load(serialisedData.pipeline);
  idx.index = {};
  for (var field in serialisedData.index) {
    idx.index[field] = elasticlunr.InvertedIndex.load(serialisedData.index[field]);
  }

  return idx;
};

addField

method
elasticlunr.Index.prototype.addField()

Option name Type Description
fieldName String

The name of the field within the document that should be indexed

return elasticlunr.Index

Adds a field to the list of fields that will be searchable within documents in the index.

Remember that inner index is build based on field, which means each field has one inverted index.

Fields should be added before any documents are added to the index, fields
that are added after documents are added to the index will only apply to new
documents added to the index.

elasticlunr.Index.prototype.addField = function (fieldName) {
  this._fields.push(fieldName);
  this.index[fieldName] = new elasticlunr.InvertedIndex;
  return this;
};

setRef

method
elasticlunr.Index.prototype.setRef()

Option name Type Description
refName String

The property to use to uniquely identify the documents in the index.

emitEvent Boolean

Whether to emit add events, defaults to true

return elasticlunr.Index

Sets the property used to uniquely identify documents added to the index,
by default this property is 'id'.

This should only be changed before adding documents to the index, changing
the ref property without resetting the index can lead to unexpected results.

elasticlunr.Index.prototype.setRef = function (refName) {
  this._ref = refName;
  return this;
};

saveDocument

method
elasticlunr.Index.prototype.saveDocument()

Option name Type Description
save Boolean

Whether to save the original JSON documents.

return elasticlunr.Index

Set if the JSON format original documents are save into elasticlunr.DocumentStore

Defaultly save all the original JSON documents.

elasticlunr.Index.prototype.saveDocument = function (save) {
  this.documentStore = new elasticlunr.DocumentStore(save);
  return this;
};

addDoc

method
elasticlunr.Index.prototype.addDoc()

Option name Type Description
doc Object

The JSON format document to add to the index.

emitEvent Boolean

Whether or not to emit events, default true.

Add a JSON format document to the index.

This is the way new documents enter the index, this function will run the
fields from the document through the index's pipeline and then add it to
the index, it will then show up in search results.

An 'add' event is emitted with the document that has been added and the index
the document has been added to. This event can be silenced by passing false
as the second argument to add.

elasticlunr.Index.prototype.addDoc = function (doc, emitEvent) {
  if (!doc) return;
  var emitEvent = emitEvent === undefined ? true : emitEvent;

  var docRef = doc[this._ref];

  this.documentStore.addDoc(docRef, doc);
  this._fields.forEach(function (field) {
    var fieldTokens = this.pipeline.run(elasticlunr.tokenizer(doc[field]));
    this.documentStore.addFieldLength(docRef, field, fieldTokens.length);

    var tokenCount = {};
    fieldTokens.forEach(function (token) {
      if (token in tokenCount) tokenCount[token] += 1;
      else tokenCount[token] = 1;
    }, this);

    for (var token in tokenCount) {
      var termFrequency = tokenCount[token];
      termFrequency = Math.sqrt(termFrequency);
      this.index[field].addToken(token, { ref: docRef, tf: termFrequency });
    }
  }, this);

  if (emitEvent) this.eventEmitter.emit('add', doc, this);
};

removeDocByRef

method
elasticlunr.Index.prototype.removeDocByRef()

Option name Type Description
docRef String,Integer

The document ref to remove from the index.

emitEvent Boolean

Whether to emit remove events, defaults to true

Removes a document from the index by doc ref.

To make sure documents no longer show up in search results they can be
removed from the index using this method.

A 'remove' event is emitted with the document that has been removed and the index
the document has been removed from. This event can be silenced by passing false
as the second argument to remove.

If user setting DocumentStore not storing the documents, then remove doc by docRef is not allowed.

elasticlunr.Index.prototype.removeDocByRef = function (docRef, emitEvent) {
  if (!docRef) return;
  if (this.documentStore.isDocStored() === false) {
    return;
  }

  if (!this.documentStore.hasDoc(docRef)) return;
  var doc = this.documentStore.getDoc(docRef);
  this.removeDoc(doc, false);
};

removeDoc

method
elasticlunr.Index.prototype.removeDoc()

Option name Type Description
doc Object

The document ref to remove from the index.

emitEvent Boolean

Whether to emit remove events, defaults to true

Removes a document from the index.
This remove operation could work even the original doc is not store in the DocumentStore.

To make sure documents no longer show up in search results they can be
removed from the index using this method.

A 'remove' event is emitted with the document that has been removed and the index
the document has been removed from. This event can be silenced by passing false
as the second argument to remove.

elasticlunr.Index.prototype.removeDoc = function (doc, emitEvent) {
  if (!doc) return;

  var emitEvent = emitEvent === undefined ? true : emitEvent;

  var docRef = doc[this._ref];
  if (!this.documentStore.hasDoc(docRef)) return;

  this.documentStore.removeDoc(docRef);

  this._fields.forEach(function (field) {
    var fieldTokens = this.pipeline.run(elasticlunr.tokenizer(doc[field]));
    fieldTokens.forEach(function (token) {
      this.index[field].removeToken(token, docRef);
    }, this);
  }, this);

  if (emitEvent) this.eventEmitter.emit('remove', doc, this);
};

updateDoc

method
elasticlunr.Index.prototype.updateDoc()

Option name Type Description
doc Object

The document to update in the index.

emitEvent Boolean

Whether to emit update events, defaults to true

Updates a document in the index.

When a document contained within the index gets updated, fields changed,
added or removed, to make sure it correctly matched against search queries,
it should be updated in the index.

This method is just a wrapper around remove and add

An 'update' event is emitted with the document that has been updated and the index.
This event can be silenced by passing false as the second argument to update. Only
an update event will be fired, the 'add' and 'remove' events of the underlying calls
are silenced.

elasticlunr.Index.prototype.updateDoc = function (doc, emitEvent) {
  var emitEvent = emitEvent === undefined ? true : emitEvent;

  this.removeDocByRef(doc[this._ref], false);
  this.addDoc(doc, false);

  if (emitEvent) this.eventEmitter.emit('update', doc, this);
};

getFields

method
elasticlunr.Index.prototype.getFields()

get fields of current index instance

elasticlunr.Index.prototype.getFields = function () {
  return this._fields.slice();
};
Option name Type Description
query String

The query to search the index with.

userConfig JSON

The user query config, JSON format.

return Object

Searches the index using the passed query.
Queries should be a string, multiple words are allowed.

If config is null, will search all fields defaultly, and lead to OR based query.
If config is specified, will search specified with query time boosting.

All query tokens are passed through the same pipeline that document tokens
are passed through, so any language processing involved will be run on every
query term.

Each query term is expanded, so that the term 'he' might be expanded to
'hello' and 'help' if those terms were already included in the index.

Matching documents are returned as an array of objects, each object contains
the matching document ref, as set for this index, and the similarity score
for this document against the query.

elasticlunr.Index.prototype.search = function (query, userConfig) {
  if (!query) return [];

  var configStr = null;
  if (userConfig != null) {
    configStr = JSON.stringify(userConfig);
  }

  var config = new elasticlunr.Configuration(configStr, this.getFields()).get();

  var queryTokens = this.pipeline.run(elasticlunr.tokenizer(query));

  var queryResults = {};

  for (var field in config) {
    var fieldSearchResults = this.fieldSearch(queryTokens, field, config);
    var fieldBoost = config[field].boost;

    for (var docRef in fieldSearchResults) {
      fieldSearchResults[docRef] = fieldSearchResults[docRef] * fieldBoost;
    }

    for (var docRef in fieldSearchResults) {
      if (docRef in queryResults) {
        queryResults[docRef] += fieldSearchResults[docRef];
      } else {
        queryResults[docRef] = fieldSearchResults[docRef];
      }
    }
  }

  var results = [];
  for (var docRef in queryResults) {
    results.push({ref: docRef, score: queryResults[docRef]});
  }

  results.sort(function (a, b) { return b.score - a.score; });
  return results;
};

fieldSearch

method
elasticlunr.Index.prototype.fieldSearch()

Option name Type Description
queryTokens Array

The query tokens to query in this field.

field String

Field to query in.

config elasticlunr.Configuration

The user query config, JSON format.

return Object

search queryTokens in specified field.

elasticlunr.Index.prototype.fieldSearch = function (queryTokens, fieldName, config) {
  var booleanType = config[fieldName].bool;
  var expand = config[fieldName].expand;
  var boost = config[fieldName].boost;
  var scores = null;
  var docTokens = {};

  // Do nothing if the boost is 0
  if (boost === 0) {
    return;
  }

  queryTokens.forEach(function (token) {
    var tokens = [token];
    if (expand == true) {
      tokens = this.index[fieldName].expandToken(token);
    }
    // Consider every query token in turn. If expanded, each query token
    // corresponds to a set of tokens, which is all tokens in the 
    // index matching the pattern queryToken* .
    // For the set of tokens corresponding to a query token, find and score
    // all matching documents. Store those scores in queryTokenScores, 
    // keyed by docRef.
    // Then, depending on the value of booleanType, combine the scores
    // for this query token with previous scores.  If booleanType is OR,
    // then merge the scores by summing into the accumulated total, adding
    // new document scores are required (effectively a union operator). 
    // If booleanType is AND, accumulate scores only if the document 
    // has previously been scored by another query token (an intersection
    // operation0. 
    // Furthermore, since when booleanType is AND, additional 
    // query tokens can't add new documents to the result set, use the
    // current document set to limit the processing of each new query 
    // token for efficiency (i.e., incremental intersection).
    
    var queryTokenScores = {};
    tokens.forEach(function (key) {
      var docs = this.index[fieldName].getDocs(key);
      var idf = this.idf(key, fieldName);
      
      if (scores && booleanType == 'AND') {
          // special case, we can rule out documents that have been
          // already been filtered out because they weren't scored
          // by previous query token passes.
          var filteredDocs = {};
          for (var docRef in scores) {
              if (docRef in docs) {
                  filteredDocs[docRef] = docs[docRef];
              }
          }
          docs = filteredDocs;
      }
      // only record appeared token for retrieved documents for the
      // original token, not for expaned token.
      // beause for doing coordNorm for a retrieved document, coordNorm only care how many
      // query token appear in that document.
      // so expanded token should not be added into docTokens, if added, this will pollute the
      // coordNorm
      if (key == token) {
        this.fieldSearchStats(docTokens, key, docs);
      }

      for (var docRef in docs) {
        var tf = this.index[fieldName].getTermFrequency(key, docRef);
        var fieldLength = this.documentStore.getFieldLength(docRef, fieldName);
        var fieldLengthNorm = 1;
        if (fieldLength != 0) {
          fieldLengthNorm = 1 / Math.sqrt(fieldLength);
        }

        var penality = 1;
        if (key != token) {
          // currently I'm not sure if this penality is enough,
          // need to do verification
          penality = (1 - (key.length - token.length) / key.length) * 0.15;
        }

        var score = tf * idf * fieldLengthNorm * penality;

        if (docRef in queryTokenScores) {
          queryTokenScores[docRef] += score;
        } else {
          queryTokenScores[docRef] = score;
        }
      }
    }, this);
    
    scores = this.mergeScores(scores, queryTokenScores, booleanType);
  }, this);

  scores = this.coordNorm(scores, docTokens, queryTokens.length);
  return scores;
};

mergeScores

method
elasticlunr.Index.prototype.mergeScores()

Option name Type Description
bool Object

accumulated scores. Should be null on first call.

scores String

new scores to merge into accumScores.

op Object

merge operation (should be 'AND' or 'OR').

Merge the scores from one set of tokens into an accumulated score table.
Exact operation depends on the op parameter. If op is 'AND', then only the
intersection of the two score lists is retained. Otherwise, the union of
the two score lists is returned. For internal use only.

elasticlunr.Index.prototype.mergeScores = function (accumScores, scores, op) {
    if (!accumScores) {
        return scores; 
    }
    if (op == 'AND') {
        var intersection = {};
        for (var docRef in scores) {
            if (docRef in accumScores) {
                intersection[docRef] = accumScores[docRef] + scores[docRef];
            }
        }
        return intersection;
    } else {
        for (var docRef in scores) {
            if (docRef in accumScores) {
                accumScores[docRef] += scores[docRef];
            } else {
                accumScores[docRef] = scores[docRef];
            }
        }
        return accumScores;
    }
};

fieldSearchStats

method
elasticlunr.Index.prototype.fieldSearchStats()

Option name Type Description
docTokens Object

a data structure stores which token appears in the retrieved doc.

token String

query token

docs Object

the retrieved documents of the query token

Record the occuring query token of retrieved doc specified by doc field.
Only for inner user.

elasticlunr.Index.prototype.fieldSearchStats = function (docTokens, token, docs) {
  for (var doc in docs) {
    if (doc in docTokens) {
      docTokens[doc].push(token);
    } else {
      docTokens[doc] = [token];
    }
  }
};

coordNorm

method
elasticlunr.Index.prototype.coordNorm()

Option name Type Description
results Object

first results

docs Object

field search results of a token

n Integer

query token number

return Object

coord norm the score of a doc.
if a doc contain more query tokens, then the score will larger than the doc
contains less query tokens.

only for inner use.

elasticlunr.Index.prototype.coordNorm = function (scores, docTokens, n) {
  for (var doc in scores) {
    if (!(doc in docTokens)) continue;
    var tokens = docTokens[doc].length;
    scores[doc] = scores[doc] * tokens / n;
  }

  return scores;
};

toJSON

method
elasticlunr.Index.prototype.toJSON()

Returns a representation of the index ready for serialisation.

elasticlunr.Index.prototype.toJSON = function () {
  var indexJson = {};
  this._fields.forEach(function (field) {
    indexJson[field] = this.index[field].toJSON();
  }, this);

  return {
    version: elasticlunr.version,
    fields: this._fields,
    ref: this._ref,
    documentStore: this.documentStore.toJSON(),
    index: indexJson,
    pipeline: this.pipeline.toJSON()
  };
};

use

method
elasticlunr.Index.prototype.use()

Option name Type Description
plugin Function

The plugin to apply.

Applies a plugin to the current index.

A plugin is a function that is called with the index as its context.
Plugins can be used to customise or extend the behaviour the index
in some way. A plugin is just a function, that encapsulated the custom
behaviour that should be applied to the index.

The plugin function will be called with the index as its argument, additional
arguments can also be passed when calling use. The function will be called
with the index as its context.

Example:

var myPlugin = function (idx, arg1, arg2) {
  // `this` is the index to be extended
  // apply any extensions etc here.
}

var idx = elasticlunr(function () {
  this.use(myPlugin, 'arg1', 'arg2')
})
elasticlunr.Index.prototype.use = function (plugin) {
  var args = Array.prototype.slice.call(arguments, 1);
  args.unshift(this);
  plugin.apply(this, args);
};