'''Script pendaftar-artikel-otomatis (dari dan untuk orang primitif).
Script ini menjelajahi secara rekursif direktori situs, mendaftar semua artikel
(berekstensi .html saja).
* Menganggap tag H1 (heading terbesar) pertama sebagai tag judul.
* Ada kemampuan mengekstrak ringkasan artikel, tapi karena saya belum
menemukan tag yang unik sekaligus valid, tidak saya gunakan.
* Menangani nesting-directory dengan rekursi.
Author = Adhi Hargo
Last modified = 15/01/2007 3:49:42
'''
import sys,os,re
from _skel import *
out = sys.stdout
READSIZE = 4096
from sgmllib import SGMLParser
class TagParser(SGMLParser):
def __init__(self, tag, *attrtuples):
SGMLParser.__init__(self)
self.tag = tag
self.attrs = attrtuples[0]
def reset(self):
SGMLParser.reset(self)
self.in_tag = False
self.data = ''
def unknown_starttag(self, tag, attrs):
if (tag == self.tag and not self.data):
if (self.attrs and not self.attrs in attrs): return
self.in_tag = True
def unknown_endtag(self, tag):
if tag == self.tag and self.in_tag: self.in_tag = False
def handle_data(self, data):
if self.in_tag: self.data = data
def gettag(filename, tag, *attrs):
hf = file(filename,'rb')
tf = TagParser(tag, attrs)
tf.reset()
tf.feed(hf.read(READSIZE))
tf.close()
hf.close()
return tf.data
isfinal = re.compile("[^\.]...\.html$") DSTFILENAME = "index.isi" ARTICLEPATH = "nonkode"
def recurse_dir(dirname, level=0):
fnames = os.listdir(dirname)
dirs = [i for i in fnames if os.path.isdir(os.path.join(dirname,i))]
if level > 0:
files = [i for i in fnames
if os.path.isfile(os.path.join(dirname,i))
and not isfinal.search(i) is None
]
else: files = []
for i in dirs:
files.append({i:tuple(recurse_dir(os.path.join(dirname,i),level+1))})
return files
def stringize(dirname, fnames, root):
strlist = []
if fnames:
if dirname != root:
strlist.append('<ul id="hierarchy">')
strlist.append('<strong>%s</strong>' % dirname.title())
for i in fnames:
if type(i) is type(''):
try:
path = os.path.join(root,dirname,i)
title = gettag(path,'h1')
strlist.append(
'<li><a href="%s">%s</a></li>' %
(os.path.join(dirname, i).replace(os.sep,'/'), title))
except:
strlist.append('<li>%s</li>' % i)
else:
for j in i.keys():
strlist.extend(stringize(j, i[j], os.path.join(root, dirname)))
if dirname != root:
strlist.append('</ul>')
return strlist
def listpages(rootdir):
contentstrs = ["<h1>Uncontrollable Ramblings</h1><hr />\n"];
dstfile = file(os.path.join(rootdir,DSTFILENAME),'wb')
dirs = {rootdir:recurse_dir(rootdir)}
for i in dirs:
contentstrs.extend(stringize(i, dirs[i],os.path.join(rootdir,i)))
isi_dict = {}
isi_dict['JUDULHALAMAN'] = 'Artikel'
isi_dict['ISIHALAMAN'] = ''.join(contentstrs)
compiler = Compiler()
compiler.feed(isi_dict)
dstfile.writelines(compiler.strings)
dstfile.close()
if __name__ == '__main__':
args = sys.argv[0:]
if args: rootdir = os.path.join(os.path.abspath(args[1]), ARTICLEPATH)
else: rootdir = os.path.join(os.path.abspath(os.getcwd()), ARTICLEPATH)
listpages(rootdir)