tokenutil.py 3.87 KB
Newer Older
Stelios Karozis's avatar
Stelios Karozis committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
"""Token-related utilities"""

# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.

from collections import namedtuple
from io import StringIO
from keyword import iskeyword

import tokenize


Token = namedtuple('Token', ['token', 'text', 'start', 'end', 'line'])

def generate_tokens(readline):
    """wrap generate_tokens to catch EOF errors"""
    try:
        for token in tokenize.generate_tokens(readline):
            yield token
    except tokenize.TokenError:
        # catch EOF error
        return

def line_at_cursor(cell, cursor_pos=0):
    """Return the line in a cell at a given cursor position
    
    Used for calling line-based APIs that don't support multi-line input, yet.
    
    Parameters
    ----------
    
    cell: str
        multiline block of text
    cursor_pos: integer
        the cursor position
    
    Returns
    -------
    
    (line, offset): (string, integer)
        The line with the current cursor, and the character offset of the start of the line.
    """
    offset = 0
    lines = cell.splitlines(True)
    for line in lines:
        next_offset = offset + len(line)
        if not line.endswith('\n'):
            # If the last line doesn't have a trailing newline, treat it as if
            # it does so that the cursor at the end of the line still counts
            # as being on that line.
            next_offset += 1
        if next_offset > cursor_pos:
            break
        offset = next_offset
    else:
        line = ""
    return (line, offset)

def token_at_cursor(cell, cursor_pos=0):
    """Get the token at a given cursor
    
    Used for introspection.
    
    Function calls are prioritized, so the token for the callable will be returned
    if the cursor is anywhere inside the call.
    
    Parameters
    ----------
    
    cell : unicode
        A block of Python code
    cursor_pos : int
        The location of the cursor in the block where the token should be found
    """
    names = []
    tokens = []
    call_names = []
    
    offsets = {1: 0} # lines start at 1
    for tup in generate_tokens(StringIO(cell).readline):
        
        tok = Token(*tup)
        
        # token, text, start, end, line = tup
        start_line, start_col = tok.start
        end_line, end_col = tok.end
        if end_line + 1 not in offsets:
            # keep track of offsets for each line
            lines = tok.line.splitlines(True)
            for lineno, line in enumerate(lines, start_line + 1):
                if lineno not in offsets:
                    offsets[lineno] = offsets[lineno-1] + len(line)
        
        offset = offsets[start_line]
        # allow '|foo' to find 'foo' at the beginning of a line
        boundary = cursor_pos + 1 if start_col == 0 else cursor_pos
        if offset + start_col >= boundary:
            # current token starts after the cursor,
            # don't consume it
            break
        
        if tok.token == tokenize.NAME and not iskeyword(tok.text):
            if names and tokens and tokens[-1].token == tokenize.OP and tokens[-1].text == '.':
                names[-1] = "%s.%s" % (names[-1], tok.text)
            else:
                names.append(tok.text)
        elif tok.token == tokenize.OP:
            if tok.text == '=' and names:
                # don't inspect the lhs of an assignment
                names.pop(-1)
            if tok.text == '(' and names:
                # if we are inside a function call, inspect the function
                call_names.append(names[-1])
            elif tok.text == ')' and call_names:
                call_names.pop(-1)
        
        tokens.append(tok)
        
        if offsets[end_line] + end_col > cursor_pos:
            # we found the cursor, stop reading
            break
        
    if call_names:
        return call_names[-1]
    elif names:
        return names[-1]
    else:
        return ''