'''Script pendaftar-artikel-otomatis (dari dan untuk orang primitif).

Script ini menjelajahi secara rekursif direktori situs, mendaftar semua artikel
(berekstensi .html saja).

*   Menganggap tag H1 (heading terbesar) pertama sebagai tag judul.
*   Ada kemampuan mengekstrak ringkasan artikel, tapi karena saya belum
    menemukan tag yang unik sekaligus valid, tidak saya gunakan.
*   Menangani nesting-directory dengan rekursi.

Author          =   Adhi Hargo
Last modified   =   15/01/2007 3:49:42
'''
import sys,os,re
from _skel import *
out = sys.stdout

# ==============================================================================
#                           Tag Parser
# ==============================================================================

READSIZE = 4096

from sgmllib import SGMLParser

class TagParser(SGMLParser):
    def __init__(self, tag, *attrtuples):
        SGMLParser.__init__(self)
        self.tag = tag
        self.attrs = attrtuples[0]

    def reset(self):
        SGMLParser.reset(self)
        self.in_tag = False
        self.data = ''

    def unknown_starttag(self, tag, attrs):
        if  (tag == self.tag and not self.data):
            if (self.attrs and not self.attrs in attrs): return
            self.in_tag = True

    def unknown_endtag(self, tag):
        if tag == self.tag and self.in_tag: self.in_tag = False

    def handle_data(self, data):
        if self.in_tag: self.data = data

def gettag(filename, tag, *attrs):
    hf = file(filename,'rb')
    tf = TagParser(tag, attrs)
    tf.reset()
    tf.feed(hf.read(READSIZE))
    tf.close()
    hf.close()
    return tf.data

# ==============================================================================

isfinal = re.compile("[^\.]...\.html$") # Asumsi naif
DSTFILENAME = "index.isi"          # File data yang dituju
ARTICLEPATH = "nonkode"

def recurse_dir(dirname, level=0):
    fnames = os.listdir(dirname)
    dirs = [i for i in fnames if os.path.isdir(os.path.join(dirname,i))]
    if level > 0:
        files = [i for i in fnames
            if os.path.isfile(os.path.join(dirname,i))
                and not isfinal.search(i) is None
                ]
    else: files = []
    for i in dirs:
        files.append({i:tuple(recurse_dir(os.path.join(dirname,i),level+1))})
    return files

def stringize(dirname, fnames, root):
    strlist = []
    if fnames:
        if dirname != root:
            strlist.append('<ul id="hierarchy">')
            strlist.append('<strong>%s</strong>' % dirname.title())
        for i in fnames:
            if type(i) is type(''):
                try:
                    path = os.path.join(root,dirname,i)
                    title = gettag(path,'h1')
                    strlist.append(
                        '<li><a href="%s">%s</a></li>' %
                        (os.path.join(dirname, i).replace(os.sep,'/'), title))
                except:
                    strlist.append('<li>%s</li>' % i)
            else:
                for j in i.keys():
                    strlist.extend(stringize(j, i[j], os.path.join(root, dirname)))
        if dirname != root:
            strlist.append('</ul>')
    return strlist

def listpages(rootdir):
    contentstrs = ["<h1>Uncontrollable Ramblings</h1><hr />\n"];
    dstfile = file(os.path.join(rootdir,DSTFILENAME),'wb')

    dirs = {rootdir:recurse_dir(rootdir)}
    for i in dirs:
        contentstrs.extend(stringize(i, dirs[i],os.path.join(rootdir,i)))

    isi_dict = {}
    isi_dict['JUDULHALAMAN'] = 'Artikel'
    isi_dict['ISIHALAMAN'] = ''.join(contentstrs)

    compiler = Compiler()
    compiler.feed(isi_dict)

    dstfile.writelines(compiler.strings)
    dstfile.close()

if __name__ == '__main__':
    args = sys.argv[0:]
    if args:    rootdir = os.path.join(os.path.abspath(args[1]), ARTICLEPATH)
    else:       rootdir = os.path.join(os.path.abspath(os.getcwd()), ARTICLEPATH)

    listpages(rootdir)