143 lines
4.9 KiB
Python
143 lines
4.9 KiB
Python
import os
|
|
import re
|
|
import threading
|
|
|
|
|
|
class StatisticalFunction:
|
|
def __init__(self):
|
|
self.java_dir =[]
|
|
self.zh_file = []
|
|
self.python_dir = []
|
|
self.robot_dir = []
|
|
|
|
|
|
def check_lang_every_word(self,file_path):
|
|
""" 获取文件中注释外含中文的文档 """
|
|
# file_path ='C:\\Users\\HuoH # a=time.time()ua\\Downloads\\peppa-parent-api\\src\\main\\java\\com\\peppa\\parent\\api\\common\\ApiErrorEnum.java'
|
|
with open(file_path,'r',encoding='utf-8') as text:
|
|
all_content = text.read()
|
|
all_content = all_content.encode('utf-8')
|
|
all_content = all_content.decode('utf-8','ignore')
|
|
yy= re.sub("\/\*\*.*?\*\*\/", "",all_content) # 去除多行注释
|
|
yy = re.sub("\/\*[\w\W]*?\*\/|\/\/.*", "", yy) # 去除文本注释
|
|
yy = re.sub("(?<!:)\/\/.*", "", yy) # 去除行注释
|
|
yy=yy.replace(" ","")
|
|
yy=yy.replace('\n','')
|
|
# for eachline in text.read():
|
|
# line = eachline.strip()
|
|
# l2 =line.encode('utf-8')
|
|
# l1=l2.decode('utf-8','ignore')
|
|
# lineTuple = langid.classify(l1)
|
|
# for word in yy:
|
|
# lineTuple = langid.classify(word)
|
|
# if lineTuple[0] == "zh":
|
|
# if file_path not in zh_file:
|
|
# zh_file.append(file_path)
|
|
zhPattern = re.compile(u'[\u4e00-\u9fa5]+')
|
|
match = zhPattern.search(yy)
|
|
if match:
|
|
print(match,'\n',file_path)
|
|
self.zh_file.append(file_path)
|
|
# b =time.time()
|
|
# print(b-a)
|
|
|
|
def get_java_file(self,path):
|
|
"""获取所有java文件"""
|
|
for root, dirs, files in os.walk(path):
|
|
for file in files :
|
|
if file.endswith('.java'):
|
|
# print(os.path.join(root, file))
|
|
self.java_dir.append(os.path.join(root, file))
|
|
|
|
if dirs:
|
|
for dir_single in dirs:
|
|
if re.match('__',dir_single) :
|
|
print(dir_single)
|
|
# print(os.path.join(root, dirs[0]))
|
|
self.get_java_file(os.path.join(root,dir_single))
|
|
return self.java_dir
|
|
|
|
|
|
def muti_threading(self):
|
|
# 多线程调用
|
|
threads =[]
|
|
for file_zh in self.java_dir:
|
|
threads.append(
|
|
threading.Thread(target=self.check_lang_every_word,args=(file_zh,))
|
|
)
|
|
for thread in threads:
|
|
thread.start()
|
|
# print(thread.name)
|
|
for thread in threads:
|
|
thread.join()
|
|
# print(thread.name,"结束")
|
|
|
|
print("end")
|
|
|
|
def get_python_file(self,path):
|
|
"""获取所有python文件"""
|
|
for root, dirs, files in os.walk(path):
|
|
for file in files :
|
|
if file.endswith('.py'):
|
|
# print(os.path.join(root, file))
|
|
self.python_dir.append(os.path.join(root, file))
|
|
|
|
if dirs:
|
|
for dir_single in dirs:
|
|
if re.match('__',dir_single) :
|
|
print(dir_single)
|
|
# print(os.path.join(root, dirs[0]))
|
|
self.get_python_file(os.path.join(root,dir_single))
|
|
return self.python_dir
|
|
|
|
def get_robot_file(self,path):
|
|
"""获取所有robot文件"""
|
|
for root, dirs, files in os.walk(path):
|
|
for file in files :
|
|
if file.endswith('.robot'):
|
|
# print(os.path.join(root, file))
|
|
self.robot_dir.append(os.path.join(root, file))
|
|
|
|
if dirs:
|
|
for dir_single in dirs:
|
|
if re.match('__',dir_single) :
|
|
print(dir_single)
|
|
# print(os.path.join(root, dirs[0]))
|
|
self.get_robot_file(os.path.join(root,dir_single))
|
|
return self.robot_dir
|
|
|
|
@staticmethod
|
|
def get_class_function(class_ubrd):
|
|
ubrd_function = []
|
|
all_function = dir(class_ubrd)
|
|
for function in all_function:
|
|
if 'kw_in_ubrd' in function :
|
|
ubrd_function.append(function)
|
|
|
|
return ubrd_function
|
|
|
|
@staticmethod
|
|
def statistical(ubrd_function,dir_robot_python):
|
|
dict_function = {}
|
|
for function in ubrd_function:
|
|
count_num_all = 0
|
|
for dir_a in dir_robot_python:
|
|
|
|
with open(dir_a,'r',encoding='utf-8') as text:
|
|
all_content = text.read()
|
|
count_num_file = all_content.count(function)
|
|
count_num_all = count_num_all +count_num_file
|
|
count_num_all = count_num_all-1
|
|
if count_num_all in dict_function:
|
|
dict_function[count_num_all] += 1
|
|
else:
|
|
dict_function[count_num_all] = 1
|
|
return dict_function
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
pass
|
|
|
|
|