elasticlunr.Index is object that manages a search index. It contains the indexes
and stores all the tokens and document lookups. It also provides the main
user facing API for the library.
elasticlunr.Index = function () {
this._fields = [];
this._ref = 'id';
this.pipeline = new elasticlunr.Pipeline;
this.documentStore = new elasticlunr.DocumentStore;
this.index = {};
this.eventEmitter = new elasticlunr.EventEmitter;
this._idfCache = {};
this.on('add', 'remove', 'update', (function () {
this._idfCache = {};
}).bind(this));
};
Option name | Type | Description |
---|---|---|
[eventName] | String | The name(s) of events to bind the function to. |
fn | Function | The serialised set to load. |
Bind a handler to events being emitted by the index.
The handler can be bound to many events at the same time.
elasticlunr.Index.prototype.on = function () {
var args = Array.prototype.slice.call(arguments);
return this.eventEmitter.addListener.apply(this.eventEmitter, args);
};
Option name | Type | Description |
---|---|---|
eventName | String | The name of events to remove the function from. |
fn | Function | The serialised set to load. |
Removes a handler from an event being emitted by the index.
elasticlunr.Index.prototype.off = function (name, fn) {
return this.eventEmitter.removeListener(name, fn);
};
Option name | Type | Description |
---|---|---|
serialisedData | Object | The serialised set to load. |
return | elasticlunr.Index |
Loads a previously serialised index.
Issues a warning if the index being imported was serialised
by a different version of elasticlunr.
elasticlunr.Index.load = function (serialisedData) {
if (serialisedData.version !== elasticlunr.version) {
elasticlunr.utils.warn('version mismatch: current '
+ elasticlunr.version + ' importing ' + serialisedData.version);
}
var idx = new this;
idx._fields = serialisedData.fields;
idx._ref = serialisedData.ref;
idx.documentStore = elasticlunr.DocumentStore.load(serialisedData.documentStore);
idx.pipeline = elasticlunr.Pipeline.load(serialisedData.pipeline);
idx.index = {};
for (var field in serialisedData.index) {
idx.index[field] = elasticlunr.InvertedIndex.load(serialisedData.index[field]);
}
return idx;
};
Option name | Type | Description |
---|---|---|
fieldName | String | The name of the field within the document that should be indexed |
return | elasticlunr.Index |
Adds a field to the list of fields that will be searchable within documents in the index.
Remember that inner index is build based on field, which means each field has one inverted index.
Fields should be added before any documents are added to the index, fields
that are added after documents are added to the index will only apply to new
documents added to the index.
elasticlunr.Index.prototype.addField = function (fieldName) {
this._fields.push(fieldName);
this.index[fieldName] = new elasticlunr.InvertedIndex;
return this;
};
Option name | Type | Description |
---|---|---|
refName | String | The property to use to uniquely identify the documents in the index. |
emitEvent | Boolean | Whether to emit add events, defaults to true |
return | elasticlunr.Index |
Sets the property used to uniquely identify documents added to the index,
by default this property is 'id'.
This should only be changed before adding documents to the index, changing
the ref property without resetting the index can lead to unexpected results.
elasticlunr.Index.prototype.setRef = function (refName) {
this._ref = refName;
return this;
};
Option name | Type | Description |
---|---|---|
save | Boolean | Whether to save the original JSON documents. |
return | elasticlunr.Index |
Set if the JSON format original documents are save into elasticlunr.DocumentStore
Defaultly save all the original JSON documents.
elasticlunr.Index.prototype.saveDocument = function (save) {
this.documentStore = new elasticlunr.DocumentStore(save);
return this;
};
Option name | Type | Description |
---|---|---|
doc | Object | The JSON format document to add to the index. |
emitEvent | Boolean | Whether or not to emit events, default true. |
Add a JSON format document to the index.
This is the way new documents enter the index, this function will run the
fields from the document through the index's pipeline and then add it to
the index, it will then show up in search results.
An 'add' event is emitted with the document that has been added and the index
the document has been added to. This event can be silenced by passing false
as the second argument to add.
elasticlunr.Index.prototype.addDoc = function (doc, emitEvent) {
if (!doc) return;
var emitEvent = emitEvent === undefined ? true : emitEvent;
var docRef = doc[this._ref];
this.documentStore.addDoc(docRef, doc);
this._fields.forEach(function (field) {
var fieldTokens = this.pipeline.run(elasticlunr.tokenizer(doc[field]));
this.documentStore.addFieldLength(docRef, field, fieldTokens.length);
var tokenCount = {};
fieldTokens.forEach(function (token) {
if (token in tokenCount) tokenCount[token] += 1;
else tokenCount[token] = 1;
}, this);
for (var token in tokenCount) {
var termFrequency = tokenCount[token];
termFrequency = Math.sqrt(termFrequency);
this.index[field].addToken(token, { ref: docRef, tf: termFrequency });
}
}, this);
if (emitEvent) this.eventEmitter.emit('add', doc, this);
};
Option name | Type | Description |
---|---|---|
docRef | String,Integer | The document ref to remove from the index. |
emitEvent | Boolean | Whether to emit remove events, defaults to true |
Removes a document from the index by doc ref.
To make sure documents no longer show up in search results they can be
removed from the index using this method.
A 'remove' event is emitted with the document that has been removed and the index
the document has been removed from. This event can be silenced by passing false
as the second argument to remove.
If user setting DocumentStore not storing the documents, then remove doc by docRef is not allowed.
elasticlunr.Index.prototype.removeDocByRef = function (docRef, emitEvent) {
if (!docRef) return;
if (this.documentStore.isDocStored() === false) {
return;
}
if (!this.documentStore.hasDoc(docRef)) return;
var doc = this.documentStore.getDoc(docRef);
this.removeDoc(doc, false);
};
Option name | Type | Description |
---|---|---|
doc | Object | The document ref to remove from the index. |
emitEvent | Boolean | Whether to emit remove events, defaults to true |
Removes a document from the index.
This remove operation could work even the original doc is not store in the DocumentStore.
To make sure documents no longer show up in search results they can be
removed from the index using this method.
A 'remove' event is emitted with the document that has been removed and the index
the document has been removed from. This event can be silenced by passing false
as the second argument to remove.
elasticlunr.Index.prototype.removeDoc = function (doc, emitEvent) {
if (!doc) return;
var emitEvent = emitEvent === undefined ? true : emitEvent;
var docRef = doc[this._ref];
if (!this.documentStore.hasDoc(docRef)) return;
this.documentStore.removeDoc(docRef);
this._fields.forEach(function (field) {
var fieldTokens = this.pipeline.run(elasticlunr.tokenizer(doc[field]));
fieldTokens.forEach(function (token) {
this.index[field].removeToken(token, docRef);
}, this);
}, this);
if (emitEvent) this.eventEmitter.emit('remove', doc, this);
};
Option name | Type | Description |
---|---|---|
doc | Object | The document to update in the index. |
emitEvent | Boolean | Whether to emit update events, defaults to true |
Updates a document in the index.
When a document contained within the index gets updated, fields changed,
added or removed, to make sure it correctly matched against search queries,
it should be updated in the index.
This method is just a wrapper around remove
and add
An 'update' event is emitted with the document that has been updated and the index.
This event can be silenced by passing false as the second argument to update. Only
an update event will be fired, the 'add' and 'remove' events of the underlying calls
are silenced.
elasticlunr.Index.prototype.updateDoc = function (doc, emitEvent) {
var emitEvent = emitEvent === undefined ? true : emitEvent;
this.removeDocByRef(doc[this._ref], false);
this.addDoc(doc, false);
if (emitEvent) this.eventEmitter.emit('update', doc, this);
};
get fields of current index instance
elasticlunr.Index.prototype.getFields = function () {
return this._fields.slice();
};
Option name | Type | Description |
---|---|---|
query | String | The query to search the index with. |
userConfig | JSON | The user query config, JSON format. |
return | Object |
Searches the index using the passed query.
Queries should be a string, multiple words are allowed.
If config is null, will search all fields defaultly, and lead to OR based query.
If config is specified, will search specified with query time boosting.
All query tokens are passed through the same pipeline that document tokens
are passed through, so any language processing involved will be run on every
query term.
Each query term is expanded, so that the term 'he' might be expanded to
'hello' and 'help' if those terms were already included in the index.
Matching documents are returned as an array of objects, each object contains
the matching document ref, as set for this index, and the similarity score
for this document against the query.
elasticlunr.Index.prototype.search = function (query, userConfig) {
if (!query) return [];
var configStr = null;
if (userConfig != null) {
configStr = JSON.stringify(userConfig);
}
var config = new elasticlunr.Configuration(configStr, this.getFields()).get();
var queryTokens = this.pipeline.run(elasticlunr.tokenizer(query));
var queryResults = {};
for (var field in config) {
var fieldSearchResults = this.fieldSearch(queryTokens, field, config);
var fieldBoost = config[field].boost;
for (var docRef in fieldSearchResults) {
fieldSearchResults[docRef] = fieldSearchResults[docRef] * fieldBoost;
}
for (var docRef in fieldSearchResults) {
if (docRef in queryResults) {
queryResults[docRef] += fieldSearchResults[docRef];
} else {
queryResults[docRef] = fieldSearchResults[docRef];
}
}
}
var results = [];
for (var docRef in queryResults) {
results.push({ref: docRef, score: queryResults[docRef]});
}
results.sort(function (a, b) { return b.score - a.score; });
return results;
};
Option name | Type | Description |
---|---|---|
queryTokens | Array | The query tokens to query in this field. |
field | String | Field to query in. |
config | elasticlunr.Configuration | The user query config, JSON format. |
return | Object |
search queryTokens in specified field.
elasticlunr.Index.prototype.fieldSearch = function (queryTokens, fieldName, config) {
var booleanType = config[fieldName].bool;
var expand = config[fieldName].expand;
var boost = config[fieldName].boost;
var scores = null;
var docTokens = {};
// Do nothing if the boost is 0
if (boost === 0) {
return;
}
queryTokens.forEach(function (token) {
var tokens = [token];
if (expand == true) {
tokens = this.index[fieldName].expandToken(token);
}
// Consider every query token in turn. If expanded, each query token
// corresponds to a set of tokens, which is all tokens in the
// index matching the pattern queryToken* .
// For the set of tokens corresponding to a query token, find and score
// all matching documents. Store those scores in queryTokenScores,
// keyed by docRef.
// Then, depending on the value of booleanType, combine the scores
// for this query token with previous scores. If booleanType is OR,
// then merge the scores by summing into the accumulated total, adding
// new document scores are required (effectively a union operator).
// If booleanType is AND, accumulate scores only if the document
// has previously been scored by another query token (an intersection
// operation0.
// Furthermore, since when booleanType is AND, additional
// query tokens can't add new documents to the result set, use the
// current document set to limit the processing of each new query
// token for efficiency (i.e., incremental intersection).
var queryTokenScores = {};
tokens.forEach(function (key) {
var docs = this.index[fieldName].getDocs(key);
var idf = this.idf(key, fieldName);
if (scores && booleanType == 'AND') {
// special case, we can rule out documents that have been
// already been filtered out because they weren't scored
// by previous query token passes.
var filteredDocs = {};
for (var docRef in scores) {
if (docRef in docs) {
filteredDocs[docRef] = docs[docRef];
}
}
docs = filteredDocs;
}
// only record appeared token for retrieved documents for the
// original token, not for expaned token.
// beause for doing coordNorm for a retrieved document, coordNorm only care how many
// query token appear in that document.
// so expanded token should not be added into docTokens, if added, this will pollute the
// coordNorm
if (key == token) {
this.fieldSearchStats(docTokens, key, docs);
}
for (var docRef in docs) {
var tf = this.index[fieldName].getTermFrequency(key, docRef);
var fieldLength = this.documentStore.getFieldLength(docRef, fieldName);
var fieldLengthNorm = 1;
if (fieldLength != 0) {
fieldLengthNorm = 1 / Math.sqrt(fieldLength);
}
var penality = 1;
if (key != token) {
// currently I'm not sure if this penality is enough,
// need to do verification
penality = (1 - (key.length - token.length) / key.length) * 0.15;
}
var score = tf * idf * fieldLengthNorm * penality;
if (docRef in queryTokenScores) {
queryTokenScores[docRef] += score;
} else {
queryTokenScores[docRef] = score;
}
}
}, this);
scores = this.mergeScores(scores, queryTokenScores, booleanType);
}, this);
scores = this.coordNorm(scores, docTokens, queryTokens.length);
return scores;
};
Option name | Type | Description |
---|---|---|
bool | Object | accumulated scores. Should be null on first call. |
scores | String | new scores to merge into accumScores. |
op | Object | merge operation (should be 'AND' or 'OR'). |
Merge the scores from one set of tokens into an accumulated score table.
Exact operation depends on the op parameter. If op is 'AND', then only the
intersection of the two score lists is retained. Otherwise, the union of
the two score lists is returned. For internal use only.
elasticlunr.Index.prototype.mergeScores = function (accumScores, scores, op) {
if (!accumScores) {
return scores;
}
if (op == 'AND') {
var intersection = {};
for (var docRef in scores) {
if (docRef in accumScores) {
intersection[docRef] = accumScores[docRef] + scores[docRef];
}
}
return intersection;
} else {
for (var docRef in scores) {
if (docRef in accumScores) {
accumScores[docRef] += scores[docRef];
} else {
accumScores[docRef] = scores[docRef];
}
}
return accumScores;
}
};
Option name | Type | Description |
---|---|---|
docTokens | Object | a data structure stores which token appears in the retrieved doc. |
token | String | query token |
docs | Object | the retrieved documents of the query token |
Record the occuring query token of retrieved doc specified by doc field.
Only for inner user.
elasticlunr.Index.prototype.fieldSearchStats = function (docTokens, token, docs) {
for (var doc in docs) {
if (doc in docTokens) {
docTokens[doc].push(token);
} else {
docTokens[doc] = [token];
}
}
};
Option name | Type | Description |
---|---|---|
results | Object | first results |
docs | Object | field search results of a token |
n | Integer | query token number |
return | Object |
coord norm the score of a doc.
if a doc contain more query tokens, then the score will larger than the doc
contains less query tokens.
only for inner use.
elasticlunr.Index.prototype.coordNorm = function (scores, docTokens, n) {
for (var doc in scores) {
if (!(doc in docTokens)) continue;
var tokens = docTokens[doc].length;
scores[doc] = scores[doc] * tokens / n;
}
return scores;
};
Returns a representation of the index ready for serialisation.
elasticlunr.Index.prototype.toJSON = function () {
var indexJson = {};
this._fields.forEach(function (field) {
indexJson[field] = this.index[field].toJSON();
}, this);
return {
version: elasticlunr.version,
fields: this._fields,
ref: this._ref,
documentStore: this.documentStore.toJSON(),
index: indexJson,
pipeline: this.pipeline.toJSON()
};
};
Option name | Type | Description |
---|---|---|
plugin | Function | The plugin to apply. |
Applies a plugin to the current index.
A plugin is a function that is called with the index as its context.
Plugins can be used to customise or extend the behaviour the index
in some way. A plugin is just a function, that encapsulated the custom
behaviour that should be applied to the index.
The plugin function will be called with the index as its argument, additional
arguments can also be passed when calling use. The function will be called
with the index as its context.
Example:
var myPlugin = function (idx, arg1, arg2) {
// `this` is the index to be extended
// apply any extensions etc here.
}
var idx = elasticlunr(function () {
this.use(myPlugin, 'arg1', 'arg2')
})
elasticlunr.Index.prototype.use = function (plugin) {
var args = Array.prototype.slice.call(arguments, 1);
args.unshift(this);
plugin.apply(this, args);
};