in java/src/main/java/com/twitter/twittertext/Extractor.java [150:174]
private void removeOverlappingEntities(List<Entity> entities) {
// sort by index
Collections.sort(entities, new Comparator<Entity>() {
public int compare(Entity e1, Entity e2) {
return e1.start - e2.start;
}
});
// Remove overlapping entities.
// Two entities overlap only when one is URL and the other is hashtag/mention
// which is a part of the URL. When it happens, we choose URL over hashtag/mention
// by selecting the one with smaller start index.
if (!entities.isEmpty()) {
Iterator<Entity> it = entities.iterator();
Entity prev = it.next();
while (it.hasNext()) {
Entity cur = it.next();
if (prev.getEnd() > cur.getStart()) {
it.remove();
} else {
prev = cur;
}
}
}
}