Blender V2.61 - r43446

spell_check_source.py

Go to the documentation of this file.
00001 # ##### BEGIN GPL LICENSE BLOCK #####
00002 #
00003 #  This program is free software; you can redistribute it and/or
00004 #  modify it under the terms of the GNU General Public License
00005 #  as published by the Free Software Foundation; either version 2
00006 #  of the License, or (at your option) any later version.
00007 #
00008 #  This program is distributed in the hope that it will be useful,
00009 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
00010 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00011 #  GNU General Public License for more details.
00012 #
00013 #  You should have received a copy of the GNU General Public License
00014 #  along with this program; if not, write to the Free Software Foundation,
00015 #  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00016 #
00017 # ##### END GPL LICENSE BLOCK #####
00018 
00019 # <pep8 compliant>
00020 
00021 """
00022 Script for checking source code spelling.
00023 
00024    python3 spell_check_source.py some_soure_file.py
00025 
00026 
00027 Currently only python source is checked.
00028 """
00029 
00030 import enchant
00031 dict_spelling = enchant.Dict("en_US")
00032 
00033 from spell_check_source_config import (dict_custom,
00034                                        dict_ignore,
00035                                        )
00036 
00037 
00038 def words_from_text(text):
00039     """ Extract words to treat as English for spell checking.
00040     """
00041     text = text.strip("#'\"")
00042     text = text.replace("/", " ")
00043     text = text.replace("-", " ")
00044     words = text.split()
00045 
00046     # filter words
00047     words[:] = [w.strip("*?!:;.,'\"`") for w in words]
00048 
00049     def word_ok(w):
00050         # check for empty string
00051         if not w:
00052             return False
00053 
00054         # check for string with no characters in it
00055         is_alpha = False
00056         for c in w:
00057             if c.isalpha():
00058                 is_alpha = True
00059                 break
00060         if not is_alpha:
00061             return False
00062 
00063         # check for prefix/suffix which render this not a real word
00064         # example '--debug', '\n'
00065         # TODO, add more
00066         if w[0] in "%-+\\":
00067             return False
00068 
00069         # check for code in comments
00070         for c in "<>{}[]():._0123456789":
00071             if c in w:
00072                 return False
00073 
00074         # check for words which contain lower case but have upper case
00075         # ending chars eg - 'StructRNA', we can ignore these.
00076         if len(w) > 1:
00077             has_lower = False
00078             for c in w:
00079                 if c.islower():
00080                     has_lower = True
00081                     break
00082             if has_lower and (not w[1:].islower()):
00083                 return False
00084 
00085         return True
00086     words[:] = [w for w in words if word_ok(w)]
00087 
00088     # text = " ".join(words)
00089 
00090     # print(text)
00091     return words
00092 
00093 
00094 class Comment:
00095     __slots__ = ("file",
00096                  "text",
00097                  "line",
00098                  "type",
00099                  )
00100 
00101     def __init__(self, file, text, line, type):
00102         self.file = file
00103         self.text = text
00104         self.line = line
00105         self.type = type
00106 
00107     def parse(self):
00108         return words_from_text(self.text)
00109 
00110 
00111 def extract_py_comments(filepath):
00112 
00113     import sys
00114     import token
00115     import tokenize
00116 
00117     source = open(filepath)
00118 
00119     comments = []
00120 
00121     prev_toktype = token.INDENT
00122 
00123     tokgen = tokenize.generate_tokens(source.readline)
00124     for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
00125         if toktype == token.STRING and prev_toktype == token.INDENT:
00126             comments.append(Comment(filepath, ttext, slineno, 'DOCSTRING'))
00127         elif toktype == tokenize.COMMENT:
00128             # non standard hint for commented CODE that we can ignore
00129             if not ttext.startswith("#~"):
00130                 comments.append(Comment(filepath, ttext, slineno, 'COMMENT'))
00131         prev_toktype = toktype
00132     return comments
00133 
00134 
00135 def spell_check_py_comments(filepath):
00136 
00137     comment_list = extract_py_comments(sys.argv[1])
00138 
00139     for comment in comment_list:
00140         for w in comment.parse():
00141             w_lower = w.lower()
00142             if w_lower in dict_custom or w_lower in dict_ignore:
00143                 continue
00144 
00145             if not dict_spelling.check(w):
00146                 print("%s:%d: %s, suggest (%s)" %
00147                       (comment.file,
00148                        comment.line,
00149                        w,
00150                        " ".join(dict_spelling.suggest(w)),
00151                        ))
00152 
00153 import sys
00154 
00155 if __name__ == "__main__":
00156     spell_check_py_comments(sys.argv[1])