User:Wmrwiki/导入xml文件/py

維基文庫,自由的圖書館
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from xml.dom.minidom import *
import datetime

def create():
    doc = Document()
    mwtag = doc.createElement('mediawiki')
    mwtag.setAttribute('xmlns', 'http://www.mediawiki.org/xml/export-0.3/')
    mwtag.setAttribute('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')
    mwtag.setAttribute('xsi:schemaLocation',
        'http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd')
    mwtag.setAttribute('version', '0.3')
    mwtag.setAttribute('xml:lang', 'zh')
    doc.appendChild(mwtag)
    return doc

def append(doc, title, contributor, text, time=datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')):
    page = doc.createElement('page')
    tt = doc.createElement('title')
    tt.appendChild(doc.createTextNode(title))
    page.appendChild(tt)
    rev = doc.createElement('revision')
    ts = doc.createElement('timestamp')
    ts.appendChild(doc.createTextNode(time))
    rev.appendChild(ts)
    cont = doc.createElement('contributor')
    un = doc.createElement('username')
    un.appendChild(doc.createTextNode(contributor))
    cont.appendChild(un)
    rev.appendChild(cont)
    txt = doc.createElement('text')
    txt.appendChild(doc.createTextNode(text))
    rev.appendChild(txt)
    page.appendChild(rev)
    doc.getElementsByTagName('mediawiki')[0].appendChild(page)

def write(doc, fn, encoding='utf8'):
    f = open(fn, 'w')
    f.write(doc.toxml(encoding))
    f.close()

def main():
    doc = create()
    append(doc, u'Wikisource:沙盒’', u'Liangent', u'新的<>测\n试')
    append(doc, u'Wikisource:沙盒’', u'Liangent', u'新的<>测\n试')
    write(doc, 'to_import.xml')

if __name__ == '__main__':
    main()