#!/usr/bin/env python3 # SPDX-FileCopyrightText: 2023 Blender Authors # # SPDX-License-Identifier: GPL-2.0-or-later """ A command line utility to check Blender's source code with CLANG's Python module. To call this directly: export CLANG_LIB_DIR=/usr/lib64 cd {BUILD_DIR} python ../blender/build_files/cmake/cmake_static_check_clang.py --match=".*" --checks=struct_comments """ import argparse import os import re import sys from typing import ( Any, Dict, List, Type, Sequence, Tuple, ) import project_source_info # pylint: disable-next=import-outside-toplevel import clang # type: ignore # pylint: disable-next=import-outside-toplevel import clang.cindex # type: ignore from clang.cindex import ( CursorKind, ) # Only for readability. ClangNode = Any ClangTranslationUnit = Any ClangSourceLocation = Any USE_VERBOSE = os.environ.get("VERBOSE", None) is not None CLANG_BIND_DIR = os.environ.get("CLANG_BIND_DIR") CLANG_LIB_DIR = os.environ.get("CLANG_LIB_DIR") if CLANG_BIND_DIR is None: print("$CLANG_BIND_DIR python binding dir not set") if CLANG_LIB_DIR is None: print("$CLANG_LIB_DIR clang lib dir not set") if CLANG_LIB_DIR: clang.cindex.Config.set_library_path(CLANG_LIB_DIR) if CLANG_BIND_DIR: sys.path.append(CLANG_BIND_DIR) CHECKER_IGNORE_PREFIX = [ "extern", ] CHECKER_EXCLUDE_SOURCE_FILES = set(os.path.join(*f.split("/")) for f in ( # Skip parsing these large (mostly data files). "source/blender/editors/space_text/text_format_pov.cc", "source/blender/editors/space_text/text_format_pov_ini.cc", )) # ----------------------------------------------------------------------------- # Utility Functions def clang_source_location_as_str(source_location: ClangSourceLocation) -> str: return "{:s}:{:d}:{:d}:".format(str(source_location.file), source_location.line, source_location.column) # ----------------------------------------------------------------------------- # Checkers class ClangChecker: """ Base class for checkers. Notes: - The function ``check_source`` takes file_data as bytes instead of a string because the offsets provided by CLANG are byte offsets. While the offsets could be converted into UNICODE offset's, there doesn't seem to be an efficient & convenient way to do that. """ __slots__ = () def __new__(cls, *args: Tuple[Any], **kwargs: Dict[str, Any]) -> Any: raise RuntimeError("%s should not be instantiated" % cls) @staticmethod def check_source( _filepath: str, _file_data: bytes, _tu: ClangTranslationUnit, _shared_check_data: Any, ) -> List[str]: raise RuntimeError("This function must be overridden by it's subclass!") return [] @staticmethod def setup() -> Any: return None @staticmethod def teardown(_shared_check_data: Any) -> None: pass class clang_checkers: # fake module. class struct_comments(ClangChecker): """ Ensure comments in struct declarations match the members of the struct, e.g: SomeStruct var = { /*name*/ "Text", /*children*/ nullptr, /*flag*/ 0, }; Will generate a warning if any of the names in the prefix comments don't match the struct member names. """ _struct_comments_ignore = { # `PyTypeObject` uses compile time members that vary (see: #PyVarObject_HEAD_INIT macro) # While some clever comment syntax could be supported to signify multiple/optional members # this is such a specific case that it's simpler to skip this warning. "PyTypeObject": {"ob_base": {"ob_size"}}, } @staticmethod def _struct_check_comments_recursive( # Static (unchanged for each recursion). filepath: str, file_data: bytes, # Different for each recursion. node: ClangNode, node_parent: ClangNode, level: int, # Used to build data. struct_decl_map: Dict[str, ClangNode], struct_type_map: Dict[str, str], output: List[str], ) -> None: # Needed to read back the node. if USE_VERBOSE: print("TRY:", node.kind, node.spelling, len(list(node.get_tokens())), level, node.location) # if node.kind == CursorKind.VAR_DECL and node.spelling == "Vector_NumMethods": # import IPython # IPython.embed() if node.kind == CursorKind.STRUCT_DECL: # Ignore forward declarations. if next(node.get_children(), None) is not None: struct_type = node.spelling.strip() if not struct_type: # The parent may be a `typedef [..] TypeID` where `[..]` is `struct { a; b; c; }`. # Inspect the parent. if node_parent is not None and (node_parent.kind == CursorKind.TYPEDEF_DECL): tokens = list(node_parent.get_tokens()) if tokens[0].spelling == "typedef": struct_type = tokens[-1].spelling struct_decl_map[struct_type] = node # Ignore declarations for anything defined outside this file. if str(node.location.file) == filepath: if node.kind == CursorKind.INIT_LIST_EXPR: if USE_VERBOSE: print(node.spelling, node.location) # Split to avoid `const struct` .. and similar. # NOTE: there may be an array size suffix, e.g. `[4]`. # This could be supported. struct_type = node.type.spelling.split()[-1] struct = struct_decl_map.get(struct_type) if struct is None: if USE_VERBOSE: print("NOT FOUND:", struct_type) struct_type = struct_type_map.get(struct_type) if struct_type is not None: struct = struct_decl_map.get(struct_type) if USE_VERBOSE: print("INSPECTING STRUCT:", struct_type) if struct is not None: member_names = [ node_child.spelling for node_child in struct.get_children() if node_child.kind == CursorKind.FIELD_DECL ] # if struct_type == "PyMappingMethods": # import IPython # IPython.embed() children = list(node.get_children()) comment_names = [] # Set to true when there is a comment directly before a value, # this is needed because: # - Comments on the previous line are rarely intended to be identifiers of the struct member. # - Comments which _are_ intended to be identifiers can be wrapped onto new-lines # so they should not be ignored. # # While it's possible every member is wrapped onto a new-line, # this is highly unlikely. comment_names_prefix_any = False for node_child in children: # Extract the content before the child # (typically a C-style comment containing the struct member). end = min(node_child.location.offset, len(file_data)) # It's possible this ID has a preceding "name::space::etc" # which should be skipped. while end > 0 and ((ch := bytes((file_data[end - 1],))).isalpha() or ch == b":"): end -= 1 has_newline = False while end > 0: ch = bytes((file_data[end - 1],)) if ch in {b"\t", b" "}: end -= 1 elif ch == b"\n": end -= 1 has_newline = True else: break beg = end - 1 while beg != 0 and bytes((file_data[beg],)) not in { b"\n", # Needed so declarations on a single line don't detect a comment # from an outer comment, e.g. # SomeStruct x = { # /*list*/ {nullptr, nullptr}, # }; # Would start inside the first `nullptr` and walk backwards to find `/*list*/`. b"{" }: beg -= 1 # Seek back until the comment end (in some cases this includes code). # This occurs when the body of the declaration includes code, e.g. # rcti x = { # /*xmin*/ foo->bar.baz, # ... snip ... # }; # Where `"xmin*/ foo->bar."` would be extracted were it not for this check. # There might be a more elegant way to handle this, for how snipping off the last # comment characters is sufficient. end_test = file_data.rfind(b"*/", end + 1, beg) if end_test != -1: end = end_test text = file_data[beg:end] if text.lstrip().startswith(b"/*"): if not has_newline: comment_names_prefix_any = True else: text = b"" comment_names.append(text.decode('utf-8')) if USE_VERBOSE: print(member_names) print(comment_names) total = min(len(member_names), len(comment_names)) if total != 0 and comment_names_prefix_any: result = [""] * total count_found = 0 count_invalid = 0 for i in range(total): comment = comment_names[i] if "/*" in comment and "*/" in comment: comment = comment.strip().strip("/").strip("*") if comment == member_names[i]: count_found += 1 else: suppress_warning = False if ( skip_members_table := clang_checkers.struct_comments._struct_comments_ignore.get( node_parent.type.spelling, ) ) is not None: if (skip_members := skip_members_table.get(comment)) is not None: if member_names[i] in skip_members: suppress_warning = True if not suppress_warning: result[i] = "Incorrect! found \"{:s}\" expected \"{:s}\"".format( comment, member_names[i]) count_invalid += 1 else: result[i] = "No comment for \"{:s}\"".format(member_names[i]) if count_found == 0 and count_invalid == 0: # No comments used, skip this as not all declaration use this comment style. output.append( "NONE: {:s} {:s}".format( clang_source_location_as_str(node.location), node.type.spelling, ) ) elif count_found != total: for i in range(total): if result[i]: output.append( "FAIL: {:s} {:s}".format( clang_source_location_as_str(children[i].location), result[i], ) ) else: output.append( "OK: {:s} {:s}".format( clang_source_location_as_str(node.location), node.type.spelling, ) ) for node_child in node.get_children(): clang_checkers.struct_comments._struct_check_comments_recursive( filepath, file_data, node_child, node, level + 1, struct_decl_map, struct_type_map, output, ) @staticmethod def check_source( filepath: str, file_data: bytes, tu: ClangTranslationUnit, _shared_check_data: Any) -> List[str]: output: List[str] = [] struct_decl_map: Dict[str, Any] = {} struct_type_map: Dict[str, str] = {} clang_checkers.struct_comments._struct_check_comments_recursive( filepath, file_data, tu.cursor, None, 0, struct_decl_map, struct_type_map, output, ) return output # ----------------------------------------------------------------------------- # Checker Class Access def check_function_get_all() -> List[str]: checkers = [] for name in dir(clang_checkers): value = getattr(clang_checkers, name) if isinstance(value, type) and issubclass(value, ClangChecker): checkers.append(name) checkers.sort() return checkers def check_class_from_id(name: str) -> Type[ClangChecker]: result = getattr(clang_checkers, name) assert issubclass(result, ClangChecker) # MYPY 0.812 doesn't recognize the assert above. return result # type: ignore def check_docstring_from_id(name: str) -> str: from textwrap import dedent result = getattr(clang_checkers, name).__doc__ return dedent(result or '').strip('\n') + '\n' # ----------------------------------------------------------------------------- # Generic Clang Checker def check_source_file( filepath: str, args: Sequence[str], check_ids: Sequence[str], shared_check_data_foreach_check: Sequence[Any], ) -> str: index = clang.cindex.Index.create() try: tu = index.parse(filepath, args) except clang.cindex.TranslationUnitLoadError as ex: return "PARSE_ERROR: {:s} {!r}".format(filepath, ex) with open(filepath, "rb") as fh: file_data = fh.read() output: List[str] = [] # we don't really care what we are looking at, just scan entire file for # function calls. for check, shared_check_data in zip(check_ids, shared_check_data_foreach_check): cls = check_class_from_id(check) output.extend(cls.check_source(filepath, file_data, tu, shared_check_data)) if not output: return "" return "\n".join(output) def check_source_file_for_imap(args: Tuple[str, Sequence[str], Sequence[str], Sequence[Any]]) -> str: return check_source_file(*args) def source_info_filter( source_info: List[Tuple[str, List[str], List[str]]], regex_list: Sequence[re.Pattern[str]], ) -> List[Tuple[str, List[str], List[str]]]: source_dir = project_source_info.SOURCE_DIR if not source_dir.endswith(os.sep): source_dir += os.sep source_info_result = [] for item in source_info: filepath_source = item[0] if filepath_source.startswith(source_dir): filepath_source_relative = filepath_source[len(source_dir):] if filepath_source_relative in CHECKER_EXCLUDE_SOURCE_FILES: CHECKER_EXCLUDE_SOURCE_FILES.remove(filepath_source_relative) continue if filepath_source_relative.startswith("intern" + os.sep + "ghost"): pass elif filepath_source_relative.startswith("source" + os.sep): pass else: continue has_match = False for regex in regex_list: if regex.match(filepath_source_relative) is not None: has_match = True if not has_match: continue else: # Skip files not in source (generated files from the build directory), # these could be check but it's not all that useful (preview blend ... etc). continue source_info_result.append(item) if CHECKER_EXCLUDE_SOURCE_FILES: sys.stderr.write( "Error: exclude file(s) are missing: {!r}\n".format((list(sorted(CHECKER_EXCLUDE_SOURCE_FILES)))) ) sys.exit(1) return source_info_result def run_checks_on_project( check_ids: Sequence[str], regex_list: Sequence[re.Pattern[str]], jobs: int, ) -> None: source_info = project_source_info.build_info(ignore_prefix_list=CHECKER_IGNORE_PREFIX) source_defines = project_source_info.build_defines_as_args() # Apply exclusion. source_info = source_info_filter(source_info, regex_list) shared_check_data_foreach_check = [ check_class_from_id(check).setup() for check in check_ids ] all_args = [] index = 0 for filepath_source, inc_dirs, defs in source_info[index:]: args = ( [("-I" + i) for i in inc_dirs] + [("-D" + d) for d in defs] + source_defines ) all_args.append((filepath_source, args, check_ids, shared_check_data_foreach_check)) import multiprocessing if jobs <= 0: jobs = multiprocessing.cpu_count() if jobs > 1: with multiprocessing.Pool(processes=jobs) as pool: # No `istarmap`, use an intermediate function. for result in pool.imap(check_source_file_for_imap, all_args): if result: print(result) else: for (filepath_source, args, _check_ids, shared_check_data_foreach_check) in all_args: result = check_source_file(filepath_source, args, check_ids, shared_check_data_foreach_check) if result: print(result) for (check, shared_check_data) in zip(check_ids, shared_check_data_foreach_check): check_class_from_id(check).teardown(shared_check_data) def create_parser(checkers_all: Sequence[str]) -> argparse.ArgumentParser: from textwrap import indent # Create doc-string for checks. checks_all_docs = [] for checker in checkers_all: # `%` -> `%%` is needed for `--help` not to interpret these as formatting arguments. checks_all_docs.append( " %s\n%s" % ( checker, indent(check_docstring_from_id(checker).replace("%", "%%"), ' '), ) ) parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawTextHelpFormatter, ) parser.add_argument( "--match", nargs='+', required=True, metavar="REGEX", help="Match file paths against this expression", ) parser.add_argument( "--checks", dest="checks", help=( "Specify the check presets to run.\n\n" + "\n".join(checks_all_docs) + "\n" "Multiple checkers may be passed at once (comma separated, no spaces)."), required=True, ) parser.add_argument( "--jobs", dest="jobs", type=int, default=0, help=( "The number of processes to use. " "Defaults to zero which detects the available cores, 1 is single threaded (useful for debugging)." ), required=False, ) return parser # ----------------------------------------------------------------------------- # Main Function def main() -> int: checkers_all = check_function_get_all() parser = create_parser(checkers_all) args = parser.parse_args() regex_list = [] for expr in args.match: try: regex_list.append(re.compile(expr)) except Exception as ex: print("Error in expression: \"{:s}\"\n {!r}".format(expr, ex)) return 1 run_checks_on_project( args.checks.split(','), regex_list, args.jobs, ) return 0 if __name__ == "__main__": sys.exit(main())