elasticlunr

tokenizer

method
elasticlunr.tokenizer()

Option name Type Description
str String

The string that you want to tokenize.

return Array

A function for splitting a string into tokens.
Currently English is supported as default.
Uses elasticlunr.tokenizer.seperator to split strings, you could change
the value of this property to set how you want strings are split into tokens.
IMPORTANT: use elasticlunr.tokenizer.seperator carefully, if you are not familiar with
text process, then you'd better not change it.

elasticlunr.tokenizer = function (str) {
  if (!arguments.length || str === null || str === undefined) return [];
  if (Array.isArray(str)) {
    var arr = str.filter(function(token) {
      if (token === null || token === undefined) {
        return false;
      }

      return true;
    });

    arr = arr.map(function (t) {
      return elasticlunr.utils.toString(t).toLowerCase();
    });

    var out = [];
    arr.forEach(function(item) {
      var tokens = item.split(elasticlunr.tokenizer.seperator);
      out = out.concat(tokens);
    }, this);

    return out;
  }

  return str.toString().trim().toLowerCase().split(elasticlunr.tokenizer.seperator);
};

defaultSeperator

property
elasticlunr.tokenizer.defaultSeperator

Default string seperator.

elasticlunr.tokenizer.defaultSeperator = /[\s\-]+/;

seperator

property
elasticlunr.tokenizer.seperator

The sperator used to split a string into tokens. Override this property to change the behaviour of
elasticlunr.tokenizer behaviour when tokenizing strings. By default this splits on whitespace and hyphens.

elasticlunr.tokenizer.seperator = elasticlunr.tokenizer.defaultSeperator;

setSeperator

method
elasticlunr.tokenizer.setSeperator()

Option name Type Description
sep Object

The customized seperator that you want to use to tokenize a string.

Set up customized string seperator

elasticlunr.tokenizer.setSeperator = function(sep) {
    if (sep !== null && sep !== undefined && typeof(sep) === 'object') {
        elasticlunr.tokenizer.seperator = sep;
    }
}

resetSeperator

method
elasticlunr.tokenizer.resetSeperator()

Reset string seperator

elasticlunr.tokenizer.resetSeperator = function() {
    elasticlunr.tokenizer.seperator = elasticlunr.tokenizer.defaultSeperator;
}

getSeperator

method
elasticlunr.tokenizer.getSeperator()

Get string seperator

elasticlunr.tokenizer.getSeperator = function() {
    return elasticlunr.tokenizer.seperator;
}