in java/src/main/java/com/twitter/twittertext/Extractor.java [384:411]
public static boolean isValidHostAndLength(int originalUrlLength, @Nullable String protocol,
@Nullable String originalHost) {
if (isEmptyString(originalHost)) {
return false;
}
final int originalHostLength = originalHost.length();
String host;
try {
// Use IDN for all host names, if the host is all ASCII, it returns unchanged.
// It comes with an added benefit of checking host length to be between 1 and 63 characters.
host = IDN.toASCII(originalHost, IDN.ALLOW_UNASSIGNED);
// toASCII can throw IndexOutOfBoundsException when the domain name is longer than
// 256 characters, instead of the documented IllegalArgumentException.
} catch (IllegalArgumentException | IndexOutOfBoundsException e) {
return false;
}
final int punycodeEncodedHostLength = host.length();
if (punycodeEncodedHostLength == 0) {
return false;
}
// The punycodeEncoded host length might be different now, offset that length from the URL.
final int urlLength = originalUrlLength + punycodeEncodedHostLength - originalHostLength;
// Add the protocol to our length check, if there isn't one,
// to ensure it doesn't go over the limit.
final int urlLengthWithProtocol =
urlLength + (protocol == null ? URL_GROUP_PROTOCOL_LENGTH : 0);
return urlLengthWithProtocol <= MAX_URL_LENGTH;
}