diff options
| author | Mauro Carvalho Chehab <mchehab+huawei@kernel.org> | 2025-04-08 18:09:11 +0800 |
|---|---|---|
| committer | Jonathan Corbet <corbet@lwn.net> | 2025-04-09 12:10:33 -0600 |
| commit | d966dc658ce381c56d85cd477e095944b8470379 (patch) | |
| tree | 70b0ba9f5462e93d76c5c0cd9e19c2e78e232dc2 | |
| parent | e31fd36da9c41f9f664e51a35860e9f606e81ef4 (diff) | |
scripts/kernel-doc.py: move KernelDoc class to a separate file
In preparation for letting kerneldoc Sphinx extension to import
Python libraries, move regex ancillary classes to a separate
file.
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Link: https://lore.kernel.org/r/c76df228504e711c6b4bcd23d5a0ea1fda678cda.1744106241.git.mchehab+huawei@kernel.org
| -rwxr-xr-x | scripts/kernel-doc.py | 1634 | ||||
| -rwxr-xr-x | scripts/lib/kdoc/kdoc_parser.py | 1690 |
2 files changed, 1692 insertions, 1632 deletions
diff --git a/scripts/kernel-doc.py b/scripts/kernel-doc.py index 7f00c8c86a78..f030a36a165b 100755 --- a/scripts/kernel-doc.py +++ b/scripts/kernel-doc.py @@ -117,53 +117,15 @@ SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) -from kdoc_re import Re, NestedMatch +from kdoc_parser import KernelDoc, type_param +from kdoc_re import Re - -# -# Regular expressions used to parse kernel-doc markups at KernelDoc class. -# -# Let's declare them in lowercase outside any class to make easier to -# convert from the python script. -# -# As those are evaluated at the beginning, no need to cache them -# - - -# Allow whitespace at end of comment start. -doc_start = Re(r'^/\*\*\s*$', cache=False) - -doc_end = Re(r'\*/', cache=False) -doc_com = Re(r'\s*\*\s*', cache=False) -doc_com_body = Re(r'\s*\* ?', cache=False) -doc_decl = doc_com + Re(r'(\w+)', cache=False) - -# @params and a strictly limited set of supported section names -# Specifically: -# Match @word: -# @...: -# @{section-name}: -# while trying to not match literal block starts like "example::" -# -doc_sect = doc_com + \ - Re(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$', - flags=re.I, cache=False) - -doc_content = doc_com_body + Re(r'(.*)', cache=False) -doc_block = doc_com + Re(r'DOC:\s*(.*)?', cache=False) -doc_inline_start = Re(r'^\s*/\*\*\s*$', cache=False) -doc_inline_sect = Re(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) -doc_inline_end = Re(r'^\s*\*/\s*$', cache=False) -doc_inline_oneline = Re(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) function_pointer = Re(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False) -attribute = Re(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", - flags=re.I | re.S, cache=False) # match expressions used to find embedded type information type_constant = Re(r"\b``([^\`]+)``\b", cache=False) type_constant2 = Re(r"\%([-_*\w]+)", cache=False) type_func = Re(r"(\w+)\(\)", cache=False) -type_param = Re(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) type_param_ref = Re(r"([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) # Special RST handling for func ptr params @@ -181,1598 +143,6 @@ type_member = Re(r"\&([_\w]+)(\.|->)([_\w]+)", cache=False) type_fallback = Re(r"\&([_\w]+)", cache=False) type_member_func = type_member + Re(r"\(\)", cache=False) -export_symbol = Re(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) -export_symbol_ns = Re(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) - -class KernelDoc: - # Parser states - STATE_NORMAL = 0 # normal code - STATE_NAME = 1 # looking for function name - STATE_BODY_MAYBE = 2 # body - or maybe more description - STATE_BODY = 3 # the body of the comment - STATE_BODY_WITH_BLANK_LINE = 4 # the body which has a blank line - STATE_PROTO = 5 # scanning prototype - STATE_DOCBLOCK = 6 # documentation block - STATE_INLINE = 7 # gathering doc outside main block - - st_name = [ - "NORMAL", - "NAME", - "BODY_MAYBE", - "BODY", - "BODY_WITH_BLANK_LINE", - "PROTO", - "DOCBLOCK", - "INLINE", - ] - - # Inline documentation state - STATE_INLINE_NA = 0 # not applicable ($state != STATE_INLINE) - STATE_INLINE_NAME = 1 # looking for member name (@foo:) - STATE_INLINE_TEXT = 2 # looking for member documentation - STATE_INLINE_END = 3 # done - STATE_INLINE_ERROR = 4 # error - Comment without header was found. - # Spit a warning as it's not - # proper kernel-doc and ignore the rest. - - st_inline_name = [ - "", - "_NAME", - "_TEXT", - "_END", - "_ERROR", - ] - - # Section names - - section_default = "Description" # default section - section_intro = "Introduction" - section_context = "Context" - section_return = "Return" - - undescribed = "-- undescribed --" - - def __init__(self, config, fname): - """Initialize internal variables""" - - self.fname = fname - self.config = config - - # Initial state for the state machines - self.state = self.STATE_NORMAL - self.inline_doc_state = self.STATE_INLINE_NA - - # Store entry currently being processed - self.entry = None - - # Place all potential outputs into an array - self.entries = [] - - def show_warnings(self, dtype, declaration_name): - # TODO: implement it - - return True - - # TODO: rename to emit_message - def emit_warning(self, ln, msg, warning=True): - """Emit a message""" - - if warning: - self.config.log.warning("%s:%d %s", self.fname, ln, msg) - else: - self.config.log.info("%s:%d %s", self.fname, ln, msg) - - def dump_section(self, start_new=True): - """ - Dumps section contents to arrays/hashes intended for that purpose. - """ - - name = self.entry.section - contents = self.entry.contents - - # TODO: we can prevent dumping empty sections here with: - # - # if self.entry.contents.strip("\n"): - # if start_new: - # self.entry.section = self.section_default - # self.entry.contents = "" - # - # return - # - # But, as we want to be producing the same output of the - # venerable kernel-doc Perl tool, let's just output everything, - # at least for now - - if type_param.match(name): - name = type_param.group(1) - - self.entry.parameterdescs[name] = contents - self.entry.parameterdesc_start_lines[name] = self.entry.new_start_line - - self.entry.sectcheck += name + " " - self.entry.new_start_line = 0 - - elif name == "@...": - name = "..." - self.entry.parameterdescs[name] = contents - self.entry.sectcheck += name + " " - self.entry.parameterdesc_start_lines[name] = self.entry.new_start_line - self.entry.new_start_line = 0 - - else: - if name in self.entry.sections and self.entry.sections[name] != "": - # Only warn on user-specified duplicate section names - if name != self.section_default: - self.emit_warning(self.entry.new_start_line, - f"duplicate section name '{name}'\n") - self.entry.sections[name] += contents - else: - self.entry.sections[name] = contents - self.entry.sectionlist.append(name) - self.entry.section_start_lines[name] = self.entry.new_start_line - self.entry.new_start_line = 0 - -# self.config.log.debug("Section: %s : %s", name, pformat(vars(self.entry))) - - if start_new: - self.entry.section = self.section_default - self.entry.contents = "" - - # TODO: rename it to store_declaration - def output_declaration(self, dtype, name, **args): - """ - Stores the entry into an entry array. - - The actual output and output filters will be handled elsewhere - """ - - # The implementation here is different than the original kernel-doc: - # instead of checking for output filters or actually output anything, - # it just stores the declaration content at self.entries, as the - # output will happen on a separate class. - # - # For now, we're keeping the same name of the function just to make - # easier to compare the source code of both scripts - - if "declaration_start_line" not in args: - args["declaration_start_line"] = self.entry.declaration_start_line - - args["type"] = dtype - - # TODO: use colletions.OrderedDict - - sections = args.get('sections', {}) - sectionlist = args.get('sectionlist', []) - - # Drop empty sections - # TODO: improve it to emit warnings - for section in [ "Description", "Return" ]: - if section in sectionlist: - if not sections[section].rstrip(): - del sections[section] - sectionlist.remove(section) - - self.entries.append((name, args)) - - self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) - - def reset_state(self, ln): - """ - Ancillary routine to create a new entry. It initializes all - variables used by the state machine. - """ - - self.entry = argparse.Namespace - - self.entry.contents = "" - self.entry.function = "" - self.entry.sectcheck = "" - self.entry.struct_actual = "" - self.entry.prototype = "" - - self.entry.parameterlist = [] - self.entry.parameterdescs = {} - self.entry.parametertypes = {} - self.entry.parameterdesc_start_lines = {} - - self.entry.section_start_lines = {} - self.entry.sectionlist = [] - self.entry.sections = {} - - self.entry.anon_struct_union = False - - self.entry.leading_space = None - - # State flags - self.state = self.STATE_NORMAL - self.inline_doc_state = self.STATE_INLINE_NA - self.entry.brcount = 0 - - self.entry.in_doc_sect = False - self.entry.declaration_start_line = ln - - def push_parameter(self, ln, decl_type, param, dtype, - org_arg, declaration_name): - if self.entry.anon_struct_union and dtype == "" and param == "}": - return # Ignore the ending }; from anonymous struct/union - - self.entry.anon_struct_union = False - - param = Re(r'[\[\)].*').sub('', param, count=1) - - if dtype == "" and param.endswith("..."): - if Re(r'\w\.\.\.$').search(param): - # For named variable parameters of the form `x...`, - # remove the dots - param = param[:-3] - else: - # Handles unnamed variable parameters - param = "..." - - if param not in self.entry.parameterdescs or \ - not self.entry.parameterdescs[param]: - - self.entry.parameterdescs[param] = "variable arguments" - - elif dtype == "" and (not param or param == "void"): - param = "void" - self.entry.parameterdescs[param] = "no arguments" - - elif dtype == "" and param in ["struct", "union"]: - # Handle unnamed (anonymous) union or struct - dtype = param - param = "{unnamed_" + param + "}" - self.entry.parameterdescs[param] = "anonymous\n" - self.entry.anon_struct_union = True - - # Handle cache group enforcing variables: they do not need - # to be described in header files - elif "__cacheline_group" in param: - # Ignore __cacheline_group_begin and __cacheline_group_end - return - - # Warn if parameter has no description - # (but ignore ones starting with # as these are not parameters - # but inline preprocessor statements) - if param not in self.entry.parameterdescs and not param.startswith("#"): - self.entry.parameterdescs[param] = self.undescribed - - if self.show_warnings(dtype, declaration_name) and "." not in param: - if decl_type == 'function': - dname = f"{decl_type} parameter" - else: - dname = f"{decl_type} member" - - self.emit_warning(ln, - f"{dname} '{param}' not described in '{declaration_name}'") - - # Strip spaces from param so that it is one continuous string on - # parameterlist. This fixes a problem where check_sections() - # cannot find a parameter like "addr[6 + 2]" because it actually - # appears as "addr[6", "+", "2]" on the parameter list. - # However, it's better to maintain the param string unchanged for - # output, so just weaken the string compare in check_sections() - # to ignore "[blah" in a parameter string. - - self.entry.parameterlist.append(param) - org_arg = Re(r'\s\s+').sub(' ', org_arg) - self.entry.parametertypes[param] = org_arg - - def save_struct_actual(self, actual): - """ - Strip all spaces from the actual param so that it looks like - one string item. - """ - - actual = Re(r'\s*').sub("", actual, count=1) - - self.entry.struct_actual += actual + " " - - def create_parameter_list(self, ln, decl_type, args, splitter, declaration_name): - - # temporarily replace all commas inside function pointer definition - arg_expr = Re(r'(\([^\),]+),') - while arg_expr.search(args): - args = arg_expr.sub(r"\1#", args) - - for arg in args.split(splitter): - # Strip comments - arg = Re(r'\/\*.*\*\/').sub('', arg) - - # Ignore argument attributes - arg = Re(r'\sPOS0?\s').sub(' ', arg) - - # Strip leading/trailing spaces - arg = arg.strip() - arg = Re(r'\s+').sub(' ', arg, count=1) - - if arg.startswith('#'): - # Treat preprocessor directive as a typeless variable just to fill - # corresponding data structures "correctly". Catch it later in - # output_* subs. - - # Treat preprocessor directive as a typeless variable - self.push_parameter(ln, decl_type, arg, "", - "", declaration_name) - - elif Re(r'\(.+\)\s*\(').search(arg): - # Pointer-to-function - - arg = arg.replace('#', ',') - - r = Re(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') - if r.match(arg): - param = r.group(1) - else: - self.emit_warning(ln, f"Invalid param: {arg}") - param = arg - - dtype = Re(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) - self.save_struct_actual(param) - self.push_parameter(ln, decl_type, param, dtype, - arg, declaration_name) - - elif Re(r'\(.+\)\s*\[').search(arg): - # Array-of-pointers - - arg = arg.replace('#', ',') - r = Re(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') - if r.match(arg): - param = r.group(1) - else: - self.emit_warning(ln, f"Invalid param: {arg}") - param = arg - - dtype = Re(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) - - self.save_struct_actual(param) - self.push_parameter(ln, decl_type, param, dtype, - arg, declaration_name) - - elif arg: - arg = Re(r'\s*:\s*').sub(":", arg) - arg = Re(r'\s*\[').sub('[', arg) - - args = Re(r'\s*,\s*').split(arg) - if args[0] and '*' in args[0]: - args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) - - first_arg = [] - r = Re(r'^(.*\s+)(.*?\[.*\].*)$') - if args[0] and r.match(args[0]): - args.pop(0) - first_arg.extend(r.group(1)) - first_arg.append(r.group(2)) - else: - first_arg = Re(r'\s+').split(args.pop(0)) - - args.insert(0, first_arg.pop()) - dtype = ' '.join(first_arg) - - for param in args: - if Re(r'^(\*+)\s*(.*)').match(param): - r = Re(r'^(\*+)\s*(.*)') - if not r.match(param): - self.emit_warning(ln, f"Invalid param: {param}") - continue - - param = r.group(1) - - self.save_struct_actual(r.group(2)) - self.push_parameter(ln, decl_type, r.group(2), - f"{dtype} {r.group(1)}", - arg, declaration_name) - - elif Re(r'(.*?):(\w+)').search(param): - r = Re(r'(.*?):(\w+)') - if not r.match(param): - self.emit_warning(ln, f"Invalid param: {param}") - continue - - if dtype != "": # Skip unnamed bit-fields - self.save_struct_actual(r.group(1)) - self.push_parameter(ln, decl_type, r.group(1), - f"{dtype}:{r.group(2)}", - arg, declaration_name) - else: - self.save_struct_actual(param) - self.push_parameter(ln, decl_type, param, dtype, - arg, declaration_name) - - def check_sections(self, ln, decl_name, decl_type, sectcheck, prmscheck): - sects = sectcheck.split() - prms = prmscheck.split() - err = False - - for sx in range(len(sects)): # pylint: disable=C0200 - err = True - for px in range(len(prms)): # pylint: disable=C0200 - prm_clean = prms[px] - prm_clean = Re(r'\[.*\]').sub('', prm_clean) - prm_clean = attribute.sub('', prm_clean) - - # ignore array size in a parameter string; - # however, the original param string may contain - # spaces, e.g.: addr[6 + 2] - # and this appears in @prms as "addr[6" since the - # parameter list is split at spaces; - # hence just ignore "[..." for the sections check; - prm_clean = Re(r'\[.*').sub('', prm_clean) - - if prm_clean == sects[sx]: - err = False - break - - if err: - if decl_type == 'function': - dname = f"{decl_type} parameter" - else: - dname = f"{decl_type} member" - - self.emit_warning(ln, - f"Excess {dname} '{sects[sx]}' description in '{decl_name}'") - - def check_return_section(self, ln, declaration_name, return_type): - - if not self.config.wreturn: - return - - # Ignore an empty return type (It's a macro) - # Ignore functions with a "void" return type (but not "void *") - if not return_type or Re(r'void\s*\w*\s*$').search(return_type): - return - - if not self.entry.sections.get("Return", None): - self.emit_warning(ln, - f"No description found for return value of '{declaration_name}'") - - def dump_struct(self, ln, proto): - """ - Store an entry for an struct or union - """ - - type_pattern = r'(struct|union)' - - qualifiers = [ - "__attribute__", - "__packed", - "__aligned", - "____cacheline_aligned_in_smp", - "____cacheline_aligned", - ] - - definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" - struct_members = Re(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') - - # Extract struct/union definition - members = None - declaration_name = None - decl_type = None - - r = Re(type_pattern + r'\s+(\w+)\s*' + definition_body) - if r.search(proto): - decl_type = r.group(1) - declaration_name = r.group(2) - members = r.group(3) - else: - r = Re(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') - - if r.search(proto): - decl_type = r.group(1) - declaration_name = r.group(3) - members = r.group(2) - - if not members: - self.emit_warning(ln, f"{proto} error: Cannot parse struct or union!") - self.config.errors += 1 - return - - if self.entry.identifier != declaration_name: - self.emit_warning(ln, - f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") - return - - args_pattern =r'([^,)]+)' - - sub_prefixes = [ - (Re(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), - (Re(r'\/\*\s*private:.*', re.S| re.I), ''), - - # Strip comments - (Re(r'\/\*.*?\*\/', re.S), ''), - - # Strip attributes - (attribute, ' '), - (Re(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (Re(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (Re(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (Re(r'\s*__packed\s*', re.S), ' '), - (Re(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), - (Re(r'\s*____cacheline_aligned_in_smp', re.S), ' '), - (Re(r'\s*____cacheline_aligned', re.S), ' '), - - # Unwrap struct_group macros based on this definition: - # __struct_group(TAG, NAME, ATTRS, MEMBERS...) - # which has variants like: struct_group(NAME, MEMBERS...) - # Only MEMBERS arguments require documentation. - # - # Parsing them happens on two steps: - # - # 1. drop struct group arguments that aren't at MEMBERS, - # storing them as STRUCT_GROUP(MEMBERS) - # - # 2. remove STRUCT_GROUP() ancillary macro. - # - # The original logic used to remove STRUCT_GROUP() using an - # advanced regex: - # - # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; - # - # with two patterns that are incompatible with - # Python re module, as it has: - # - # - a recursive pattern: (?1) - # - an atomic grouping: (?>...) - # - # I tried a simpler version: but it didn't work either: - # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; - # - # As it doesn't properly match the end parenthesis on some cases. - # - # So, a better solution was crafted: there's now a NestedMatch - # class that ensures that delimiters after a search are properly - # matched. So, the implementation to drop STRUCT_GROUP() will be - # handled in separate. - - (Re(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), - (Re(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), - (Re(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), - (Re(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), - - # Replace macros - # - # TODO: it is better to also move those to the NestedMatch logic, - # to ensure that parenthesis will be properly matched. - - (Re(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (Re(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (Re(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (Re(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (Re(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), - (Re(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), - (Re(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), - (Re(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), - (Re(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), - ] - - # Regexes here are guaranteed to have the end limiter matching - # the start delimiter. Yet, right now, only one replace group - # is allowed. - - sub_nested_prefixes = [ - (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), - ] - - for search, sub in sub_prefixes: - members = search.sub(sub, members) - - nested = NestedMatch() - - for search, sub in sub_nested_prefixes: - members = nested.sub(search, sub, members) - - # Keeps the original declaration as-is - declaration = members - - # Split nested struct/union elements - # - # This loop was simpler at the original kernel-doc perl version, as - # while ($members =~ m/$struct_members/) { ... } - # reads 'members' string on each interaction. - # - # Python behavior is different: it parses 'members' only once, - # creating a list of tuples from the first interaction. - # - # On other words, this won't get nested structs. - # - # So, we need to have an extra loop on Python to override such - # re limitation. - - while True: - tuples = struct_members.findall(members) - if not tuples: - break - - for t in tuples: - newmember = "" - maintype = t[0] - s_ids = t[5] - content = t[3] - - oldmember = "".join(t) - - for s_id in s_ids.split(','): - s_id = s_id.strip() - - newmember += f"{maintype} {s_id}; " - s_id = Re(r'[:\[].*').sub('', s_id) - s_id = Re(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) - - for arg in content.split(';'): - arg = arg.strip() - - if not arg: - continue - - r = Re(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') - if r.match(arg): - # Pointer-to-function - dtype = r.group(1) - name = r.group(2) - extra = r.group(3) - - if not name: - continue - - if not s_id: - # Anonymous struct/union - newmember += f"{dtype}{name}{extra}; " - else: - newmember += f"{dtype}{s_id}.{name}{extra}; " - - else: - arg = arg.strip() - # Handle bitmaps - arg = Re(r':\s*\d+\s*').sub('', arg) - - # Handle arrays - arg = Re(r'\[.*\]').sub('', arg) - - # Handle multiple IDs - arg = Re(r'\s*,\s*').sub(',', arg) - - - r = Re(r'(.*)\s+([\S+,]+)') - - if r.search(arg): - dtype = r.group(1) - names = r.group(2) - else: - newmember += f"{arg}; " - continue - - for name in names.split(','): - name = Re(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() - - if not name: - continue - - if not s_id: - # Anonymous struct/union - newmember += f"{dtype} {name}; " - else: - newmember += f"{dtype} {s_id}.{name}; " - - members = members.replace(oldmember, newmember) - - # Ignore other nested elements, like enums - members = re.sub(r'(\{[^\{\}]*\})', '', members) - - self.create_parameter_list(ln, decl_type, members, ';', - declaration_name) - self.check_sections(ln, declaration_name, decl_type, - self.entry.sectcheck, self.entry.struct_actual) - - # Adjust declaration for better display - declaration = Re(r'([\{;])').sub(r'\1\n', declaration) - declaration = Re(r'\}\s+;').sub('};', declaration) - - # Better handle inlined enums - while True: - r = Re(r'(enum\s+\{[^\}]+),([^\n])') - if not r.search(declaration): - break - - declaration = r.sub(r'\1,\n\2', declaration) - - def_args = declaration.split('\n') - level = 1 - declaration = "" - for clause in def_args: - - clause = clause.strip() - clause = Re(r'\s+').sub(' ', clause, count=1) - - if not clause: - continue - - if '}' in clause and level > 1: - level -= 1 - - if not Re(r'^\s*#').match(clause): - declaration += "\t" * level - - declaration += "\t" + clause + "\n" - if "{" in clause and "}" not in clause: - level += 1 - - self.output_declaration(decl_type, declaration_name, - struct=declaration_name, - module=self.entry.modulename, - definition=declaration, - parameterlist=self.entry.parameterlist, - parameterdescs=self.entry.parameterdescs, - parametertypes=self.entry.parametertypes, - sectionlist=self.entry.sectionlist, - sections=self.entry.sections, - purpose=self.entry.declaration_purpose) - - def dump_enum(self, ln, proto): - - # Ignore members marked private - proto = Re(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) - proto = Re(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) - - # Strip comments - proto = Re(r'\/\*.*?\*\/', flags=re.S).sub('', proto) - - # Strip #define macros inside enums - proto = Re(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) - - members = None - declaration_name = None - - r = Re(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') - if r.search(proto): - declaration_name = r.group(2) - members = r.group(1).rstrip() - else: - r = Re(r'enum\s+(\w*)\s*\{(.*)\}') - if r.match(proto): - declaration_name = r.group(1) - members = r.group(2).rstrip() - - if not members: - self.emit_warning(ln, f"{proto}: error: Cannot parse enum!") - self.config.errors += 1 - return - - if self.entry.identifier != declaration_name: - if self.entry.identifier == "": - self.emit_warning(ln, - f"{proto}: wrong kernel-doc identifier on prototype") - else: - self.emit_warning(ln, - f"expecting prototype for enum {self.entry.identifier}. Prototype was for enum {declaration_name} instead") - return - - if not declaration_name: - declaration_name = "(anonymous)" - - member_set = set() - - members = Re(r'\([^;]*?[\)]').sub('', members) - - for arg in members.split(','): - if not arg: - continue - arg = Re(r'^\s*(\w+).*').sub(r'\1', arg) - self.entry.parameterlist.append(arg) - if arg not in self.entry.parameterdescs: - self.entry.parameterdescs[arg] = self.undescribed - if self.show_warnings("enum", declaration_name): - self.emit_warning(ln, - f"Enum value '{arg}' not described in enum '{declaration_name}'") - member_set.add(arg) - - for k in self.entry.parameterdescs: - if k not in member_set: - if self.show_warnings("enum", declaration_name): - self.emit_warning(ln, - f"Excess enum value '%{k}' description in '{declaration_name}'") - - self.output_declaration('enum', declaration_name, - enum=declaration_name, - module=self.config.modulename, - parameterlist=self.entry.parameterlist, - parameterdescs=self.entry.parameterdescs, - sectionlist=self.entry.sectionlist, - sections=self.entry.sections, - purpose=self.entry.declaration_purpose) - - def dump_declaration(self, ln, prototype): - if self.entry.decl_type == "enum": - self.dump_enum(ln, prototype) - return - - if self.entry.decl_type == "typedef": - self.dump_typedef(ln, prototype) - return - - if self.entry.decl_type in ["union", "struct"]: - self.dump_struct(ln, prototype) - return - - # TODO: handle other types - self.output_declaration(self.entry.decl_type, prototype, - entry=self.entry) - - def dump_function(self, ln, prototype): - - func_macro = False - return_type = '' - decl_type = 'function' - - # Prefixes that would be removed - sub_prefixes = [ - (r"^static +", "", 0), - (r"^extern +", "", 0), - (r"^asmlinkage +", "", 0), - (r"^inline +", "", 0), - (r"^__inline__ +", "", 0), - (r"^__inline +", "", 0), - (r"^__always_inline +", "", 0), - (r"^noinline +", "", 0), - (r"^__FORTIFY_INLINE +", "", 0), - (r"__init +", "", 0), - (r"__init_or_module +", "", 0), - (r"__deprecated +", "", 0), - (r"__flatten +", "", 0), - (r"__meminit +", "", 0), - (r"__must_check +", "", 0), - (r"__weak +", "", 0), - (r"__sched +", "", 0), - (r"_noprof", "", 0), - (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), - (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0), - (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), - (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0), - (r"__attribute_const__ +", "", 0), - - # It seems that Python support for re.X is broken: - # At least for me (Python 3.13), this didn't work -# (r""" -# __attribute__\s*\(\( -# (?: -# [\w\s]+ # attribute name -# (?:\([^)]*\))? # attribute arguments -# \s*,? # optional comma at the end -# )+ -# \)\)\s+ -# """, "", re.X), - - # So, remove whitespaces and comments from it - (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0), - ] - - for search, sub, flags in sub_prefixes: - prototype = Re(search, flags).sub(sub, prototype) - - # Macros are a special case, as they change the prototype format - new_proto = Re(r"^#\s*define\s+").sub("", prototype) - if new_proto != prototype: - is_define_proto = True - prototype = new_proto - else: - is_define_proto = False - - # Yes, this truly is vile. We are looking for: - # 1. Return type (may be nothing if we're looking at a macro) - # 2 |
