planetp planetp - 3 months ago 7x
Linux Question

How to print signatures of all functions/methods in a Python project?

I want to extract all Python functions/methods with their signatures from a Python project. I've tried:

$ grep -r ^def *

but this doesn't show full signatures when parameters span several lines. Any suggestions?


You can tokenize the file and use that to print function definitions:

import token
from tokenize import generate_tokens

def find_definitions(filename):
    with open(filename) as f:
        gen = generate_tokens(f.readline)
        for tok in gen:
            if tok[0] == token.NAME and tok[1] == 'def':
                # function definition, read until next colon.
                definition, last_line = [tok[-1]], tok[3][0]
                while not (tok[0] == token.OP and tok[1] == ':'):
                    if last_line != tok[3][0]:
                        # more than one line, append, track line number
                        last_line = tok[3][0]
                    tok = next(gen)
                if last_line != tok[3][0]:
                yield ''.join(definition)

This works regardless of how many lines a function definition uses.


>>> import textwrap
>>> gen = find_definitions(textwrap.__file__.rstrip('c'))
>>> for definition in gen:
...     print(definition.rstrip())
    def __init__(self,
    def _munge_whitespace(self, text):
    def _split(self, text):
    def _fix_sentence_endings(self, chunks):
    def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
    def _wrap_chunks(self, chunks):
    def wrap(self, text):
    def fill(self, text):
def wrap(text, width=70, **kwargs):
def fill(text, width=70, **kwargs):
def dedent(text):

The above uses the textwrap module to demonstrate how it can handle multi-line definitions.

If you need to support Python 3 code with annotations, you'll need to be a little bit cleverer and track open and closing parens too; a colon within the parentheses doesn't count. On the other hand, Python 3 tokenize.tokenize() produces named tuples which make this a little easier to read:

import token
from tokenize import tokenize

def find_definitions(filename):
    with open(filename) as f:
        gen = generate_tokens(f.readline)
        for tok in gen:
            if tok.type == token.NAME and tok.string == 'def':
                # function definition, read until next colon outside
                # parentheses.
                definition, last_line = [tok.line], tok.end[0]
                parens = 0
                while tok.exact_type != token.COLON or parens > 0:
                    if last_line != tok.end[0]:
                        last_line = tok.end[0]
                    if tok.exact_type == token.LPAR:
                        parens += 1
                    elif tok.exact_type == token.RPAR:
                        parens -= 1
                    tok = next(gen)
                if last_line != tok.end[0]:
                yield ''.join(definition)

Take into account that in Python 3 you'd preferably open source files in binary mode and let the tokenizer figure out the right encoding.