def _tokenize()

in cstar/nodetoolparser/describering.py [0:0]


def _tokenize(original_line, offset=0):
    line = original_line.lstrip()
    offset += len(original_line) - len(line)
    if not line:
        return ()
    if line[0] in {'(', ')', '[', ']', ',', ':'}:
        return (Symbol(line[0], offset),) + _tokenize(line[1:], offset + 1)

    # Handle an ipv6 address .. the : mucks the tokenizer .. :(
    results=ipv6_addr_re.search(line)
    if results:
        tok = results.group()
        offset = results.span()[1]
        line = line[offset:]
        identifier = Identifier(tok, offset)
        return (identifier,) + _tokenize(line, offset + 1)

    if line[0].isalnum() or line[0] in {'-'}:
        tok = ""
        while True:
            tok += line[0]
            line = line[1:]
            if not line:
                break
            if not (line[0].isalnum() or line[0] in {'.', '_', '-'}):
                break

        if tok.isnumeric() or (tok[0] == '-' and tok[1:].isnumeric()):
            identifier = Identifier(int(tok), offset)
        else:
            identifier = Identifier(tok, offset)
        return (identifier,) + _tokenize(line, offset + len(tok))
    raise ParseException(line, offset, "Could not parse string")