src/models/tokenize.js (16 lines of code) (raw):

const PATTERN = /http:\/\/t\.co\/\w+|http:\/\/vine\.co\/\w+|http:\/\/t\.co\w+|http:\/\/vine\.co\w+|http:\/\/t\.\w+|http:\/\/vine\.\w+|http:\/\/\w+|\@\w+|\#\w+|\d+(,\d+)+|\w+(-\w+)*|\$?\d+(\.\d+)?\%?|([A-Za-z]\.)+/g; export function tokenize(text) { const tokens = []; PATTERN.lastIndex = 0; let tokenResult = PATTERN.exec(text); while (tokenResult != null) { tokens.push(tokenResult[0].trim()); tokenResult = PATTERN.exec(text); } return tokens; } export function tokenizeBySpace(text) { return text .split(' ') .filter(x => x.length > 0); }