import os import re from parso import python_bytes_to_unicode from jedi.debug import dbg from jedi.file_io import KnownContentFileIO from jedi.inference.imports import SubModuleName, load_module_from_path from jedi.inference.filters import ParserTreeFilter from jedi.inference.gradual.conversion import convert_names _IGNORE_FOLDERS = ('.tox', 'venv', '__pycache__') _OPENED_FILE_LIMIT = 2000 """ Stats from a 2016 Lenovo Notebook running Linux: With os.walk, it takes about 10s to scan 11'000 files (without filesystem caching). Once cached it only takes 5s. So it is expected that reading all those files might take a few seconds, but not a lot more. """ _PARSED_FILE_LIMIT = 30 """ For now we keep the amount of parsed files really low, since parsing might take easily 100ms for bigger files. """ def _resolve_names(definition_names, avoid_names=()): for name in definition_names: if name in avoid_names: # Avoiding recursions here, because goto on a module name lands # on the same module. continue if not isinstance(name, SubModuleName): # SubModuleNames are not actually existing names but created # names when importing something like `import foo.bar.baz`. yield name if name.api_type == 'module': for n in _resolve_names(name.goto(), definition_names): yield n def _dictionarize(names): return dict( (n if n.tree_name is None else n.tree_name, n) for n in names ) def _find_defining_names(module_context, tree_name): found_names = _find_names(module_context, tree_name) for name in list(found_names): # Convert from/to stubs, because those might also be usages. found_names |= set(convert_names( [name], only_stubs=not name.get_root_context().is_stub(), prefer_stub_to_compiled=False )) found_names |= set(_find_global_variables(found_names, tree_name.value)) for name in list(found_names): if name.api_type == 'param' or name.tree_name is None \ or name.tree_name.parent.type == 'trailer': continue found_names |= set(_add_names_in_same_context(name.parent_context, name.string_name)) return set(_resolve_names(found_names)) def _find_names(module_context, tree_name): name = module_context.create_name(tree_name) found_names = set(name.goto()) found_names.add(name) return set(_resolve_names(found_names)) def _add_names_in_same_context(context, string_name): if context.tree_node is None: return until_position = None while True: filter_ = ParserTreeFilter( parent_context=context, until_position=until_position, ) names = set(filter_.get(string_name)) if not names: break for name in names: yield name ordered = sorted(names, key=lambda x: x.start_pos) until_position = ordered[0].start_pos def _find_global_variables(names, search_name): for name in names: if name.tree_name is None: continue module_context = name.get_root_context() try: method = module_context.get_global_filter except AttributeError: continue else: for global_name in method().get(search_name): yield global_name c = module_context.create_context(global_name.tree_name) for n in _add_names_in_same_context(c, global_name.string_name): yield n def find_references(module_context, tree_name): inf = module_context.inference_state search_name = tree_name.value # We disable flow analysis, because if we have ifs that are only true in # certain cases, we want both sides. try: inf.flow_analysis_enabled = False found_names = _find_defining_names(module_context, tree_name) finally: inf.flow_analysis_enabled = True found_names_dct = _dictionarize(found_names) module_contexts = set(d.get_root_context() for d in found_names) module_contexts = [module_context] + [m for m in module_contexts if m != module_context] # For param no search for other modules is necessary. if any(n.api_type == 'param' for n in found_names): potential_modules = module_contexts else: potential_modules = get_module_contexts_containing_name( inf, module_contexts, search_name, ) non_matching_reference_maps = {} for module_context in potential_modules: for name_leaf in module_context.tree_node.get_used_names().get(search_name, []): new = _dictionarize(_find_names(module_context, name_leaf)) if any(tree_name in found_names_dct for tree_name in new): found_names_dct.update(new) for tree_name in new: for dct in non_matching_reference_maps.get(tree_name, []): # A reference that was previously searched for matches # with a now found name. Merge. found_names_dct.update(dct) try: del non_matching_reference_maps[tree_name] except KeyError: pass else: for name in new: non_matching_reference_maps.setdefault(name, []).append(new) return found_names_dct.values() def _check_fs(inference_state, file_io, regex): try: code = file_io.read() except FileNotFoundError: return None code = python_bytes_to_unicode(code, errors='replace') if not regex.search(code): return None new_file_io = KnownContentFileIO(file_io.path, code) m = load_module_from_path(inference_state, new_file_io) if m.is_compiled(): return None return m.as_context() def gitignored_lines(folder_io, file_io): ignored_paths = set() ignored_names = set() for l in file_io.read().splitlines(): if not l or l.startswith(b'#'): continue p = l.decode('utf-8', 'ignore') if p.startswith('/'): name = p[1:] if name.endswith(os.path.sep): name = name[:-1] ignored_paths.add(os.path.join(folder_io.path, name)) else: ignored_names.add(p) return ignored_paths, ignored_names def recurse_find_python_folders_and_files(folder_io, except_paths=()): except_paths = set(except_paths) for root_folder_io, folder_ios, file_ios in folder_io.walk(): # Delete folders that we don't want to iterate over. for file_io in file_ios: path = file_io.path if path.endswith('.py') or path.endswith('.pyi'): if path not in except_paths: yield None, file_io if path.endswith('.gitignore'): ignored_paths, ignored_names = \ gitignored_lines(root_folder_io, file_io) except_paths |= ignored_paths folder_ios[:] = [ folder_io for folder_io in folder_ios if folder_io.path not in except_paths and folder_io.get_base_name() not in _IGNORE_FOLDERS ] for folder_io in folder_ios: yield folder_io, None def recurse_find_python_files(folder_io, except_paths=()): for folder_io, file_io in recurse_find_python_folders_and_files(folder_io, except_paths): if file_io is not None: yield file_io def _find_python_files_in_sys_path(inference_state, module_contexts): sys_path = inference_state.get_sys_path() except_paths = set() yielded_paths = [m.py__file__() for m in module_contexts] for module_context in module_contexts: file_io = module_context.get_value().file_io if file_io is None: continue folder_io = file_io.get_parent_folder() while True: path = folder_io.path if not any(path.startswith(p) for p in sys_path) or path in except_paths: break for file_io in recurse_find_python_files(folder_io, except_paths): if file_io.path not in yielded_paths: yield file_io except_paths.add(path) folder_io = folder_io.get_parent_folder() def get_module_contexts_containing_name(inference_state, module_contexts, name, limit_reduction=1): """ Search a name in the directories of modules. :param limit_reduction: Divides the limits on opening/parsing files by this factor. """ # Skip non python modules for module_context in module_contexts: if module_context.is_compiled(): continue yield module_context # Very short names are not searched in other modules for now to avoid lots # of file lookups. if len(name) <= 2: return file_io_iterator = _find_python_files_in_sys_path(inference_state, module_contexts) for x in search_in_file_ios(inference_state, file_io_iterator, name, limit_reduction=limit_reduction): yield x # Python 2... def search_in_file_ios(inference_state, file_io_iterator, name, limit_reduction=1): parse_limit = _PARSED_FILE_LIMIT / limit_reduction open_limit = _OPENED_FILE_LIMIT / limit_reduction file_io_count = 0 parsed_file_count = 0 regex = re.compile(r'\b' + re.escape(name) + r'\b') for file_io in file_io_iterator: file_io_count += 1 m = _check_fs(inference_state, file_io, regex) if m is not None: parsed_file_count += 1 yield m if parsed_file_count >= parse_limit: dbg('Hit limit of parsed files: %s', parse_limit) break if file_io_count >= open_limit: dbg('Hit limit of opened files: %s', open_limit) break