private List extractHashtagsWithIndices()

in java/src/main/java/com/twitter/twittertext/Extractor.java [450:498]


  private List<Entity> extractHashtagsWithIndices(String text, boolean checkUrlOverlap) {
    if (isEmptyString(text)) {
      return Collections.emptyList();
    }

    // Performance optimization.
    // If text doesn't contain #/# at all, text doesn't contain
    // hashtag, so we can simply return an empty list.
    boolean found = false;
    for (char c : text.toCharArray()) {
      if (c == '#' || c == '#') {
        found = true;
        break;
      }
    }
    if (!found) {
      return Collections.emptyList();
    }

    List<Entity> extracted = new ArrayList<Entity>();
    Matcher matcher = Regex.VALID_HASHTAG.matcher(text);

    while (matcher.find()) {
      String after = text.substring(matcher.end());
      if (!Regex.INVALID_HASHTAG_MATCH_END.matcher(after).find()) {
        extracted.add(new Entity(matcher, Entity.Type.HASHTAG, Regex.VALID_HASHTAG_GROUP_TAG));
      }
    }

    if (checkUrlOverlap) {
      // extract URLs
      List<Entity> urls = extractURLsWithIndices(text);
      if (!urls.isEmpty()) {
        extracted.addAll(urls);
        // remove overlap
        removeOverlappingEntities(extracted);
        // remove URL entities
        Iterator<Entity> it = extracted.iterator();
        while (it.hasNext()) {
          Entity entity = it.next();
          if (entity.getType() != Entity.Type.HASHTAG) {
            it.remove();
          }
        }
      }
    }

    return extracted;
  }