diff --git a/guesslang.py b/guesslang.py new file mode 100755 index 0000000000000000000000000000000000000000..6b8f57161c9dc0c4c9d80cf17c3be9397331e4de --- /dev/null +++ b/guesslang.py @@ -0,0 +1,43 @@ +#!/usr/bin/python3 + +from guess_language import guess_language +from argparse import ArgumentParser, FileType +import sys + +__authors__ = ["GG"] +__date__ = 20191031 +__description__ = 'A simple pdf example' + +def guesslang(fnm): + + fin= open(fnm, 'rt') + text= fin.read() + fin.close() + + pages= text.split("\f") + + # Note: there should not be anything behind the last Form Feed character + lastpage= pages.pop() + # print ('--- [lastpage] ----------------------------------------\n', lastpage) + lastlang= guess_language(lastpage) + + page_num= 1 + for page in pages: + # print ('--- [page ', page_num, '] ----------------------------------------\n', page) + lang= guess_language(page) + print(lang, ' ', page_num, ' ', fnm) + page_num= page_num+1 + + if (lastlang != 'UNKNOWN'): + print(lastlang, ' ', page_num+1, ' ', fnm) + +argc= len(sys.argv) +# print(argc) +if argc > 1: + for i in range(1,argc): + guesslang(sys.argv[i]) +else: + text= sys.stdin.read() + lang= guess_language(text) + print(lang, ' ', 'stdin') +