import platform
import os
import sys
from re import findall
from time import sleep
EXIT_SUCCESS = 0
EXIT_FAILURE = 1
EOF = -1
def getTxt(filepath, index=0) -> str: # get .txt content
coding = ("utf-8", "gbk", "utf-16") # codings
if 0 <= index < len(coding): # in the range
try:
with open(filepath, "r", encoding=coding[index]) as f:
content = f.read()
return content[1:] if content.startswith("\ufeff") else content # if utf-8 with BOM, remove BOM
except (UnicodeError, UnicodeDecodeError):
return getTxt(filepath, index + 1) # recursion
except:
return None
else:
return None # out of range
def removeCommentLine(text) -> str: # remove comment lines
lines = text.split("\n")
for i, line in enumerate(lines):
for j in range(len(line)):
if line[j] == "%" and (0 == j or line[j - 1] != "\\"):
lines[i] = lines[i][:j]
return "\n".join(lines)
def clearScreen(fakeClear=120):
if sys.stdin.isatty(): # is at a console
if platform.system().lower() == "linux":
os.system("clear")
else:
try:
print("\n" * int(fakeClear))
except:
print("\n" * 120)
else:
try:
print("\n" * int(fakeClear))
except:
print("\n" * 120)
def press_any_key_to_continue():
input("Press any key to continue...")
def preExit(countdownTime=5) -> None:
try:
cntTime = int(countdownTime)
length = len(str(cntTime))
except:
return
print()
while cntTime > 0:
print("\rProgram ended, exiting in {{0:>{0}}}} second(s). ".format(length).format(cntTime), end="")
try:
sleep(1)
except:
print("\rProgram ended, exiting in {{0:>{0}}}} second(s). ".format(length).format(0))
return
cntTime -= 1
print("\rProgram ended, exiting in {{0:>{0}}}} second(s). ".format(length).format(cntTime))
def loadFolder(latex_folder) -> dict:
dicts = {}
for root, dirs, files in os.walk(latex_folder):
for f in files:
filepath = os.path.join(root, f)
if os.path.isfile(filepath):
if filepath.lower().endswith(".tex"):
dicts.setdefault("tex", [])
dicts["tex"].append(filepath)
elif filepath.lower().endswith(".bib"):
dicts.setdefault("bib", [])
dicts["bib"].append(filepath)
return dicts
def checkLabels(texFilepaths, isDebug=False) -> bool:
clearScreen()
if type(texFilepaths) not in (tuple, list) or not texFilepaths:
print("As no tex files are found, the checking cannot work. ")
print("Please press any key to go back. ")
press_any_key_to_continue()
return None
content = ""
for texFilepath in texFilepaths:
text = getTxt(texFilepath)
if text is None:
print("Read tex file \"{0}\" failed. ".format(texFilepath))
else:
content += removeCommentLine(text) + "\n"
labels = [item[item.index("{") + 1:-1] for item in findall("\\\\label\\{.+?\\}", content)]
refs = [item[item.index("{") + 1:-1] for item in findall("\\\\ref\\{.+?\\}", content)] + [item[item.index("{") + 1:-1] for item in findall("\\\\eqref\\{.+?\\}", content)]
for i in range(len(refs) - 1, -1, -1):
if "," in refs[i]:
refs += [item.strip() for item in refs[i].split(",")]
del refs[i]
if isDebug:
print("labels =", labels)
print("refs =", refs)
s = set()
repeated_label = set()
undefined_label = set()
unreferred_label = set()
for label in labels:
if label in s:
repeated_label.add(label)
else:
s.add(label)
if label not in refs:
unreferred_label.add(label)
for ref in refs:
if ref not in labels:
undefined_label.add(ref)
if len(s) == 1:
print("This is the label checking. There is 1 label in total. ")
elif len(s) > 1:
print("This is the label checking. There are {0} labels in total. ".format(len(s)))
else:
print("This is the label checking. There are no labels found. ")
print()
if len(repeated_label) == 1:
print("There is a repeated label found: \"{0}\". ".format(*repeated_label))
elif len(repeated_label) > 1:
print("There are {0} repeated labels found. The details are as follows. \n{1}".format(len(repeated_label), repeated_label))
else:
print("No repeated labels are found. ")
if len(undefined_label) == 1:
print("There is an undefined label found: \"{0}\". ".format(*undefined_label))
elif len(undefined_label) > 1:
print("There are {0} undefined labels found. The details are as follows. \n{1}".format(len(undefined_label), undefined_label))
else:
print("No undefined labels are found. ")
if len(unreferred_label) == 1:
print("There is an unreferred label found: \"{0}\". ".format(*unreferred_label))
elif len(unreferred_label) > 1:
print("There are {0} unreferred labels found. The details are as follows. \n{1}".format(len(unreferred_label), unreferred_label))
else:
print("No unreferred labels are found. ")
print()
if input("Would you like to check again (input \"Y\" and enter to check again): ").upper() == "Y":
return checkLabels(texFilepaths, isDebug=isDebug)
else:
return not any([repeated_label, undefined_label, unreferred_label])
def checkCitations(texFilepaths, isDebug=False) -> bool:
clearScreen()
if type(texFilepaths) not in (tuple, list) or not texFilepaths:
print("As no tex files are found, the checking cannot work. ")
print("Please press any key to go back. ")
press_any_key_to_continue()
return None
content = ""
for texFilepath in texFilepaths:
text = getTxt(texFilepath)
if text is None:
print("Read tex file \"{0}\" failed. ".format(texFilepath))
else:
content += removeCommentLine(text) + "\n"
cites = [item[item.index("{") + 1:-1] for item in findall("\\\\cite\\{.+?\\}", content)]
for i in range(len(cites) - 1, -1, -1):
if "," in cites[i]:
cites += [item.strip() for item in cites[i].split(",")]
del cites[i]
dicts = {}
repeated_entry = []
for line in content.split("\n"):
targets = findall("\\\\bibitem\\{.+?\\}", line)
if len(targets):
target = targets[0]
key = target[target.index("{") + 1:-1]
if key in dicts:
repeated_entry.append(key)
else:
dicts[key] = line[len(target):]
space_start = []
multiple_space = []
end_dot = []
repeated_content = []
undefined_entry = set()
uncited_entry = set()
for key in list(dicts.keys()):
if not dicts[key].startswith(" "):
space_start.append(key)
if dicts[key][:2] in (" ", " \t"): # do not use elif
multiple_space.append(key)
if not dicts[key].endswith(". "): # do not use elif
end_dot.append(key)
for key in list(dicts.keys()):
dicts[key] = dicts[key].strip()
reverse_dict = {}
for key in list(dicts.keys()):
reverse_dict.setdefault(dicts[key], [])
reverse_dict[dicts[key]].append(key)
for key in list(reverse_dict.keys()):
if len(reverse_dict[key]) > 2:
repeated_content.append(reverse_dict[key])
for key in list(dicts.keys()):
if key not in cites:
uncited_entry.add(key)
for cite in cites:
if cite not in dicts:
undefined_entry.add(cite)
if isDebug:
print("cites =", cites)
print("dicts =", dicts)
print("This is the citation checking. The result is as follows. ")
print()
if len(repeated_entry) == 1:
print("There is a repeated bibitem key: \"{0}\". ".format(*repeated_entry))
elif len(repeated_entry) > 1:
print("There are {0} repeated bibitem keys. The details are as follows. \n{1}".format(len(repeated_entry), repeated_entry))
else:
print("No repeated bibitem key is found. ")
if len(space_start) == 1:
print("There is a bibitem entry not starting with a space: \"{0}\". ".format(*space_start))
elif len(space_start) > 1:
print("There are {0} bibitem entries not starting with a space. The details are as follows. \n{1}".format(len(space_start), space_start))
else:
print("All bibitem entries start with a space. ")
if len(multiple_space) == 1:
print("There is a bibitem entry starting with multiple spaces: \"{0}\". ".format(*multiple_space))
elif len(multiple_space) > 1:
print("There are {0} bibitem entries starting with multiple spaces. The details are as follows. \n{1}".format(len(multiple_space), multiple_space))
else:
print("No bibitem entry starts with multiple spaces. ")
if len(end_dot) == 1:
print("There is a bibitem entry not ending with a dot: \"{0}\". ".format(*end_dot))
elif len(end_dot) > 1:
print("There are {0} bibitem entries not ending with a dot. The details are as follows. \n{1}".format(len(end_dot), end_dot))
else:
print("All bibitem entries end with a dot. ")
if len(repeated_content) == 1:
print("There are repeated bibitem entries. The details are as follows. \n{0}".format(repeated_content[0]))
elif len(repeated_content) > 1:
print("There are {0} groups of repeated bibitem entries. The details are as follows. \n{1}".format(len(repeated_content), repeated_content))
else:
print("No repeated bibitem entry is found. ")
if len(undefined_entry) == 1:
print("There is an undefined citation key: \"{0}\". ".format(*undefined_entry))
elif len(undefined_entry) > 1:
print("There are {0} undefined citation keys. The details are as follows. \n{1}".format(len(undefined_entry), undefined_entry))
else:
print("No undefined citation key is found. ")
if len(uncited_entry) == 1:
print("There is an uncited bibitem entry: \"{0}\". ".format(*uncited_entry))
elif len(uncited_entry) > 1:
print("There are {0} uncited bibitem entries. The details are as follows. \n{1}".format(len(uncited_entry), uncited_entry))
else:
print("No uncited bibitem entry is found. ")
print()
if input("Would you like to check again (input \"Y\" and enter to check again): ").upper() == "Y":
return checkCitations(texFilepaths, isDebug=isDebug)
else:
return not any([repeated_entry, space_start, multiple_space, end_dot, repeated_content, undefined_entry, uncited_entry])
def checkBibtex(bibFilepaths, isDebug=False) -> bool:
clearScreen()
if type(bibFilepaths) not in (tuple, list) or not bibFilepaths:
print("As no bib files are found, the checking cannot work. ")
print("Please press any key to go back. ")
press_any_key_to_continue()
return None
content = ""
for bibFilepath in bibFilepaths:
text = getTxt(bibFilepath)
if text is None:
print("Read bib file \"{0}\" failed. ".format(bibFilepath))
else:
content += removeCommentLine(text) + "\n"
bibitems = [item[item.index("{") + 1:item.index(",")] for item in findall("@.+?\\{.+?,", content)]
if isDebug:
print("bibitems =", bibitems)
s = set()
repeated_bibitem = set()
for bibitem in bibitems:
if bibitem in s:
repeated_bibitem.add(bibitem)
else:
s.add(bibitem)
if len(s) == 1:
print("This is the bibtex checking. There is 1 bibitem in total. ")
elif len(s) > 1:
print("This is the bibtex checking. There are {0} bibitems in total. ".format(len(s)))
else:
print("This is the bibtex checking. There are no bibitems found. ")
print()
if len(repeated_bibitem) == 1:
print("There is a repeated bibitem key: \"{0}\". ".format(*repeated_bibitem))
elif len(repeated_bibitem) > 1:
print("There are {0} repeated bibitem keys. The details are as follows. \n{1}".format(len(repeated_bibitem), repeated_bibitem))
else:
print("No repeated bibitem key is found. ")
print()
if input("Would you like to check again (input \"Y\" and enter to check again): ").upper() == "Y":
return checkBibtex(bibFilepaths, isDebug=isDebug)
else:
return not any([repeated_bibitem])
def citationSurvey(texFilepaths, isDebug=False) -> dict:
clearScreen()
if type(texFilepaths) not in (tuple, list) or not texFilepaths:
print("As no tex files are found, the checking cannot work. ")
print("Please press any key to go back. ")
press_any_key_to_continue()
return None
dicts = {}
for texFilepath in texFilepaths:
text = getTxt(texFilepath)
if text is None:
print("Read tex file \"{0}\" failed. ".format(texFilepath))
else:
lines = removeCommentLine(text).split("\n")
section = ""
for line in lines:
if len(findall("\\\\section\\{.+?\\}", line)):
section = findall("\\\\section\\{.+?\\}", line)[0][9:-1]
elif len(findall("\\\\section\\*\\{.+?\\}", line)):
section = findall("\\\\section\\*\\{.+?\\}", line)[0][10:-1]
elif len(findall("\\\\chapter\\{.+?\\}", line)):
section = findall("\\\\chapter\\{.+?\\}", line)[0][9:-1]
elif len(findall("\\\\chapter\\*\\{.+?\\}", line)):
section = findall("\\\\chapter\\*\\{.+?\\}", line)[0][10:-1]
if not section:
continue
for cite in findall("\\\\cite\\{.+?\\}", line):
cite = cite[cite.index("{") + 1:-1]
for subCite in cite.split(","):
dicts.setdefault(section, {})
dicts[section].setdefault(subCite.strip(), 0)
dicts[section][subCite.strip()] += 1
for section in list(dicts.keys()):
totalCount = 0
for subCite in list(dicts[section].keys()):
totalCount += dicts[section][subCite]
dicts[section] = {
"citation_count": len(dicts[section]),
"total_count": totalCount,
"details": dicts[section],
}
if isDebug:
print("dicts =", dicts)
print("This is the citation survey. The result is as follows. ")
print()
if len(dicts):
sections = sorted(list(dicts.keys()))
maxSectionLen = max([len(section) for section in sections])
for section in sections:
print(section.ljust(maxSectionLen + 2) + "contains {0} citations with {1} citation count(s). ".format(dicts[section]["citation_count"], dicts[section]["total_count"]))
else:
print("No citation is found. ")
print()
if input("Would you like to check again (input \"Y\" and enter to check again): ").upper() == "Y":
return citationSurvey(texFilepaths, isDebug=isDebug)
else:
return dicts
def mainBoard(texFilepaths, bibFilepaths, isDebug=False):
clearScreen()
if isDebug:
print("texFilepaths =", texFilepaths)
print("bibFilepaths =", bibFilepaths)
print("Main Board\n" + "-" * 10)
print("1. Reload files")
print("2. Check labels")
print("3. Check citations")
print("4. Check bibtex")
print("5. Citation survey")
print("0. Exit")
print()
print("Please input the corresponding number to select. ", end="")
print("The default option is \"0\". ", end="")
userInput = input().strip()
print()
if userInput == "1":
return
elif userInput == "2":
result = checkLabels(texFilepaths, isDebug=isDebug)
print("Check label result:", result)
elif userInput == "3":
result = checkCitations(texFilepaths, isDebug=isDebug)
print("Check citation result:", result)
elif userInput == "4":
result = checkBibtex(bibFilepaths, isDebug=isDebug)
print("Check bibtex result:", result)
elif userInput == "5":
result = citationSurvey(texFilepaths, isDebug=isDebug)
print("Citation survey result:", result)
else:
return
def main():
latex_folder = os.getcwd()
isDebug = True # Change this to False in production
while True:
clearScreen()
if isDebug:
print("latex_folder =", latex_folder)
print("Welcome to use the Label/Citation Checker!\n" + "-" * 10)
print("Please enter the folder path where the LaTeX files are located.")
userInput = input("Folder path (default is the current folder): ").strip()
if userInput:
latex_folder = userInput
else:
latex_folder = os.getcwd()
file_dict = loadFolder(latex_folder)
mainBoard(file_dict.get("tex", []), file_dict.get("bib", []), isDebug=isDebug)
if __name__ == "__main__":
main()
Reference:
https://github.com/BatchClayderman/checkCite