モジュールのコメントを抜き出してdocbook形式で出力する

さて、先日から色々実験していた「モジュールのコメントを抜き出す」ですが、
実はDocBookのXMLを出力が最終目的でした。
クラスのリファレンスはpydocで出力したものでも十分なんですが、やはり
私にとって不要な情報も多く出力されてしまっているのでちょっとだけ嫌でした。（笑

で、先日までの実験の結果を踏まえて、1つのクラスにまとめてみました。
あ、そうそう、今回はinspectモジュールを使用する方法を使っています。
（よくよく考えると、実際に動く状態のソースのドキュメントを作るケースがほとんどかも。。と思ったので）

__import__()を行うので、モジュールのトップレベルでかかれている処理は実行されてしまう。。

という問題はありますね。
この点は、ゆくゆく改善していく予定です。

で、今回のクラスのソースはこんな感じです。

# -*- coding: utf-8 -*-

import os
import sys
import inspect
from xml.dom.minidom import Document


#
#
class ModuleDocGenerator:
    '''
    モジュールのドキュメント文字列を抽出するためのモジュール。
    生成時、コンストラクタで指定されたモジュールをパースし、内部情報を構築。

    get_doc_str()を呼び出すことでdocbook形式のxmlテキストを取得することができる。
    
    '''
    def __init__(self, modPath):
        '''
        初期化。

        引数:
            modPath: 処理対象のモジュールファイルパス。
                     ディレクトリが指定された場合には、再帰的に.pyファイルを処理する。
        '''
        modPath = os.path.normpath(os.path.expanduser(modPath))
        self.modDictList = []
        self._parse_module(modPath)

    def _parse_module(self, modPath):
        '''
        モジュールファイルからドキュメント文字列を取り出し、辞書を構築する。

        内部的に構築される辞書の形式：
            {'module': モジュール名,
             'doc': モジュールのドキュメント文字列
             'classes': ['cls': クラス名称,
                         'doc': クラスのドキュメント文字列,
                         'methods': [{'method': メソッド名称,
                                      'doc': メソッドのドキュメント文字列}]]}

        引数:
            modPath: 処理対象のモジュールファイルパス。
                     ディレクトリが指定された場合には、再帰的に.pyファイルを処理する。
        '''
        if os.path.isdir(modPath):
            if modPath not in sys.path:
                sys.path.append(modPath)
                
            for p in os.listdir(modPath):
                self._parse_module(os.path.join(modPath, p))
        elif os.path.isfile(modPath) == False:
            print 'No such module: %s' % modPath
            return

        if os.path.splitext(modPath)[1] != '.py':
            return

        modName = inspect.getmodulename(modPath)
        try:
            mod = __import__(modName)
        except Exception, err:
            print err
            return

        # モジュール情報を構築
        modDict = {'module': modName, 'doc': None, 'classes':[]}
        modDict['doc'] = inspect.getdoc(mod)        
        self.modDictList.append(modDict)

        # クラス情報を構築
        for name, cls in inspect.getmembers(mod, inspect.isclass):
            if mod != inspect.getmodule(cls):
                continue

            cDict = {'cls': name, 'doc': None, 'methods': []}
            modDict['classes'].append(cDict)
            
            doc = inspect.getdoc(cls)
            if doc:
                cDict['doc'] = doc.strip().decode('utf-8')

            # メソッド情報を構築
            for mName, mObj in inspect.getmembers(cls, inspect.ismethod):
                if mod != inspect.getmodule(mObj):
                    continue

                mDict = {'method': mName,
                         'args': inspect.getargspec(mObj)[0],
                         'doc': None}
                cDict['methods'].append(mDict)
            
                doc = inspect.getdoc(mObj)
                if doc == None:
                    continue

                mDict['doc'] = doc.strip().decode('utf-8')

    def get_doc_str(self, chapterName):
        '''
        docbook形式のxmlテキストを取得する。
        このメソッドによって取得できる文字列すべての行が行頭から始まる。

        引数:
            chapterName: チャプター名文字列。

        戻り値:
            文字列。
        '''
        doc = Document()
        
        root = doc.createElement('chapter')
        doc.appendChild(root)

        root.setAttribute('id', chapterName)
        title = doc.createElement('title')
        title.appendChild(doc.createTextNode(u'クラスリファレンス'))
        root.appendChild(title)

        # モジュール情報処理ループ
        for modItem in self.modDictList:
            clsList = modItem['classes']
            if len(clsList) == 0:
                continue
            sect1Elm = doc.createElement('sect1')
            titleElm = doc.createElement('title')
            titleElm.appendChild(doc.createTextNode(modItem['module']))
            sect1Elm.appendChild(titleElm)
            root.appendChild(sect1Elm)

            # クラス情報処理ループ
            for clsItem in clsList:
                sect2Elm = doc.createElement('sect2')
                titleElm = doc.createElement('title')
                titleElm.appendChild(doc.createTextNode(clsItem['cls']))
                sect2Elm.appendChild(titleElm)
                sect1Elm.appendChild(sect2Elm)

                if clsItem['doc']:
                    para1Elm = doc.createElement('para')
                    para1Elm.appendChild(doc.createTextNode(clsItem['doc']))
                    sect2Elm.appendChild(para1Elm)

                # メソッド処理ループ
                for methodItem in clsItem['methods']:
                    sect2Elm.appendChild(doc.createComment('==================='))

                    para2Elm = doc.createElement('para')
                    para2Elm.setAttribute('class', 'synopsis')

                    funcElm = doc.createElement('function')                
                    funcElm.appendChild(doc.createTextNode(methodItem['method']))
                    para2Elm.appendChild(funcElm)

                    synopElm = doc.createElement('synopsis')
                    # selfを除く引数文字列をカンマ区切りで連結
                    argList = [s for s in methodItem['args'] if s != 'self']
                    argStr = ''
                    if len(argList) > 0:
                        argStr = reduce(lambda a, b: a + ', ' + b, argList)

                    funcStr = methodItem['method'] + '(' + argStr + ')'
                    synopElm.appendChild(doc.createTextNode(funcStr))
                    para2Elm.appendChild(synopElm)

                    if methodItem['doc']:
                        para3Elm = doc.createElement('para')
                        para3Elm.appendChild(doc.createTextNode(methodItem['doc']))
                        para2Elm.appendChild(para3Elm)

                    sect2Elm.appendChild(para2Elm)

        text = doc.toprettyxml(indent='', encoding='utf-8')
        doc.unlink()
        return text

#
#
if __name__ == '__main__':
    '''
    テスト
    '''
    if len(sys.argv) < 2:
        print 'argument error'
        sys.exit(0)
        
    gen = ModuleDocGenerator(sys.argv[1])    
    xmlDoc = gen.get_doc_str('class_ref')

    try:
        f = file('class_ref.xml', 'w')
        f.write(xmlDoc)
        f.close()
    except IOError, err:
        print err

ModuleDocGeneratorというクラスが今回の主役です。
機能の概要はコメントに書いておきましたが、説明が下手なのでうまく伝わるかどうか。。

出力例ですが、

python moduleDocGen.py moduleDocGen.py

として、自分自身を処理させてみると。。。

クラスリファレンス

moduleDocGen

ModuleDocGenerator

モジュールのドキュメント文字列を抽出するためのモジュール。
生成時、コンストラクタで指定されたモジュールをパースし、内部情報を構築。
get_doc_str()を呼び出すことでdocbook形式のxmlテキストを取得することができる。

__init__

__init__(modPath)

初期化。
引数:
modPath: 処理対象のモジュールファイルパス。
ディレクトリが指定された場合には、再帰的に.pyファイルを処理する。

_parse_module

_parse_module(modPath)

モジュールファイルからドキュメント文字列を取り出し、辞書を構築する。
内部的に構築される辞書の形式：
{'module': モジュール名,
'doc': モジュールのドキュメント文字列
'classes': ['cls': クラス名称,
'doc': クラスのドキュメント文字列,
'methods': [{'method': メソッド名称,
'doc': メソッドのドキュメント文字列}]]}
引数:
modPath: 処理対象のモジュールファイルパス。
ディレクトリが指定された場合には、再帰的に.pyファイルを処理する。

get_doc_str

get_doc_str(chapterName)

docbook形式のxmlテキストを取得する。
子のメソッドによって取得できる文字列すべての行が行頭から始まる。
引数:
chapterName: チャプター名文字列。
戻り値:
文字列。

といった出力が得られます。
この出力ファイルを、DocBookのxml中からENTITYとして参照しておいて、
htmlに変換してみると。。。以下のような結果となります。

ちなみに、DocBookのスタイルシート(xsl)とhtmlのスタイルシート(css)も自前のものを使っています。
長いけど。。。どうしようかな。。。
面倒なので、ここに貼り付けてしまいます（笑
冗長な記述が多々あるかもしれませんが、お気づきの点がありましたら、ツッコミお願いします。

まずはxslのファイル

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  version='1.0'>

  <xsl:import href="/home/kani/work/dev/docbook/docbook-masters/xsl-1.74.0/html/docbook.xsl"/>
  <xsl:output method="html" encoding="UTF-8" indent="yes"/>
  <xsl:param name="html.stylesheet">testSS.css</xsl:param>
  <xsl:param name="section.autolabel" select="1"/>

  <xsl:template match="mediaobject">
    <table class="figure">
      <tr>
        <td>
          <img>
            <xsl:attribute name="src">
              <xsl:value-of select="./imageobject/imagedata/@fileref"/>
            </xsl:attribute>
          </img>
        </td>
      </tr>
      <tr><td><xsl:value-of select="./caption"/></td></tr>
    </table>
  </xsl:template>

  <xsl:template match="para">
    <xsl:choose>
      <xsl:when test="./@class='synopsis'">
        <div class="synopsis">
<!--
          <p><b>書式 : <xsl:value-of select="./function"/></b></p>
-->
          <pre class="synopsis"><xsl:value-of select="./synopsis"/></pre>
          <p><b>説明 :</b></p>
          <p class="description">
            <pre class="description"><xsl:value-of select="./para"/></pre>
          </p>
          <br/>
        </div>
      </xsl:when>
      <xsl:otherwise>
        <p><xsl:apply-templates/></p>
      </xsl:otherwise>
    </xsl:choose>
  </xsl:template>
</xsl:stylesheet>

次に css

/* -*- coding : utf-8 -*- */

h1 {
  margin-left: 1em;
  margin-right: 1em;
  padding-left: 1em;
  line-height: 4em;
  color:f5f5f5;
  background-color: #333344;
  border-style: solid;
}

h2 {
  margin-left: 1em;
  margin-right: 1em;
  padding-left: 0.2em;
  border-style: hidden hidden dashed hidden;
  border-color: #333366;
  border-width: 1px;
}

h3 {
  margin-left: 2em;
  margin-right: 1em;
  padding-left: 0.2em;
  border-style: solid;
  border-color: #333366;
  border-width: 0px 0px 0px 8px;
}

h4, h5, h6 {
  margin-left: 3em;
  margin-right: 1em;
  padding-left: 0.2em;
}

p {
  margin-left: 5em;
}

table {
  margin-left: 5em;
  boder:solid 1px;
  empty-cells: hide;
  border-collapse: collapse;
}

thead {
  line-height: 1.5em;
  boder:solid 1px;
  empty-cells: hide;
  border-collapse: collapse;
  background-color: #dcdcdc;
}

tr, td {
  boder: solid 1px #000000;
  padding-left: 0.5em;
  padding-right: 0.5em;
}

li {
  margin-left: 4em;
}

.figure {
  text-align: center;
}

pre.code {
  margin-left: 2em;
  padding: 1em;
  background-color: #f0f8ff;
  border: solid 1px;
  border-color: #b0c4de;
}

pre.programlisting {
  margin-left: 5em;
  margin-right: 4em;
  padding: 1em;
  background-color: #f0f8ff;
  border: solid 1px;
  border-color: #b0c4de;
}

pre.synopsis {
  margin-left: 5em;
  margin-right: 4em;
  padding-left: 1em;
  font-weight: bold;
  line-height: 2em;
  border:solid 1px;
  border-color: #b0c4de;
  color: #000000;
  background-color: #778899;
}

pre.description {
  margin-left: 7em;
  margin-right: 4em;
}

p.description {
  margin-right: 4em;
}

div.mediaobject {
  margin-left: 5em;
  padding: 1em;
}