#!/usr/bin/env python """ A Python module which can parse ChangeLog formatetd document. """ # -*- coding: utf-8 -*- import re __author__ = 'mattn ' __url__ = 'http://mattn.kaoriya.net/' __date__ = 'Tue, 24 Jun 2008' __version__ = '0.1' class Item: """ ChangeLog item object. you can access attributes as title, content, tags. """ def __init__(self, title, description="", content="", tags=None): """ initialize item object. maybe, you wont this method. """ self.title = title and title.strip() or "" self.content = content and content.strip() or "" self.tags = [] if tags: if not isinstance(tags, list): tags = [tags] for tag in tags: self.tags.append(tag[1:-1]) class Entry: """ ChangeLog entry object. you can access attributes as date, author, items. """ def __init__(self, date, author): """ initialize item object. maybe, you wont this method. """ self.date = date or date.strip() or "" self.author = author or author.strip() or "" self.items = [] def parse(filename, encoding="utf-8"): """ parse ChangeLog formated docment. """ re_head = re.compile('^(\d{4}-\d{2}-\d{2}(?: \(.{3}\))?)\s*(.*)$') re_item = re.compile('^\t\*(.*?)(\[.*\])?:(.*)$') entries = [] entry = None item = None for line in open(filename): line = line.decode("utf-8") rm = re_head.match(line) if rm: if entry: entry.items.append(item) entries.append(entry) entry = Entry(date = rm.group(1), author = rm.group(2)) item = None elif entry: rm = re_item.match(line) if rm: if entry and item: entry.items.append(item) item = Item(title=rm.group(1), tags=rm.group(2), description=rm.group(3)) else: if item: item.content += line[1:] if entry: entry.items.append(item) entries.append(entry) return entries if __name__ == '__main__': entries = parse("ChangeLog") for entry in entries: print "date: %s" % entry.date print "author: %s" % entry.author for item in entry.items: print " title: %s" % item.title print " tags: %s" % ",".join(item.tags) print " content: %3s..." % item.content.split()[0]