Utilizador:MalafayaBot: diferenças entre revisões
Conteúdo apagado Conteúdo adicionado
m r2.7.5) (Robô: A adicionar: no:Bruker:MalafayaBot |
migração para pywikibot-core |
||
Linha 129: | Linha 129: | ||
#!/usr/bin/env python |
#!/usr/bin/env python |
||
# -*- coding: UTF-8 -*- |
# -*- coding: UTF-8 -*- |
||
import wikipedia |
|||
import sys |
import sys |
||
import re |
import re |
||
Linha 136: | Linha 135: | ||
import codecs |
import codecs |
||
import query |
import query |
||
import config |
from pywikibot import config |
||
site = None |
site = None |
||
Linha 144: | Linha 143: | ||
# Log bot warnings |
# Log bot warnings |
||
def log(message): |
def log(message): |
||
page = |
page = pywikibot.Page(site, u"Usuário:MalafayaBot/Log") |
||
if page.exists(): |
if page.exists(): |
||
text = page.get() |
text = page.get() |
||
Linha 151: | Linha 150: | ||
text += u"\r\n* ~~~~~: " + message |
text += u"\r\n* ~~~~~: " + message |
||
page.put(text, "Mensagem de log do bot") |
page.put(text, "Mensagem de log do bot") |
||
# Gets the base language category for a language code |
# Gets the base language category for a language code |
||
def getLangCat(langcode): |
def getLangCat(langcode): |
||
params = { |
params = { |
||
'action': 'parse', |
'action' : 'parse', |
||
'text' : u'{{nome categoria|%s}}' % langcode, |
'text' : u'{{nome categoria|%s}}' % langcode, |
||
' |
'contentmodel' : 'wikitext', |
||
'prop' : 'text', |
|||
} |
} |
||
datas = query.GetData(params, site) |
datas = query.GetData(params, site) |
||
Linha 163: | Linha 163: | ||
mo = reparsedtext.match(data) |
mo = reparsedtext.match(data) |
||
return mo.group(1).strip() |
return mo.group(1).strip() |
||
def createFLentry(transl, langcode, pos, title, gloss): |
def createFLentry(transl, langcode, pos, title, gloss): |
||
# Build a page |
# Build a page |
||
page = |
page = pywikibot.Page(site, transl) |
||
if page.exists(): |
if page.exists(): |
||
if page.isRedirectPage(): |
if page.isRedirectPage(): |
||
pywikibot.output(u"Page '%s' is redirect. Skipping" % transl) |
|||
log(u"Tentativa de criação de entrada da língua '''%s''' sobre redirecionamento \"%s\"" % |
log(u"Tentativa de criação de entrada da língua '''%s''' sobre redirecionamento \"%s\"" % |
||
(langcode, transl)) |
(langcode, transl)) |
||
return True # meaning there is a page there now |
return True # meaning there is a page there now |
||
if not page.isEmpty(): |
if not page.isEmpty(): |
||
pywikibot.output(u"Page '%s' already has contents. Skipping" % transl) |
|||
return True # meaning there is a page there now |
return True # meaning there is a page there now |
||
# check language section later ... |
# check language section later ... |
||
# Decap gloss (some people insist on capitalizing it, which is wrong) this is almost always right: |
# Decap gloss (some people insist on capitalizing it, which is wrong) this is almost always right: |
||
gl = gloss.lower() |
gl = gloss.lower() |
||
Linha 185: | Linha 185: | ||
return False |
return False |
||
if gl[1:] != gloss[1:]: gl = gloss # caps in string after first, so probably okay |
if gl[1:] != gloss[1:]: gl = gloss # caps in string after first, so probably okay |
||
langcat = getLangCat(langcode) |
langcat = getLangCat(langcode) |
||
pywikibot.output(u"Language category returned is %s" % langcat) |
|||
# Check if base language category exists (we don't want to add words for languages that are not yet cataloged or whose name is not well specified) |
# Check if base language category exists (we don't want to add words for languages that are not yet cataloged or whose name is not well specified) |
||
langCatPage = |
langCatPage = pywikibot.Page(site, u"Categoria:%s" % langcat) |
||
if not langCatPage.exists(): |
if not langCatPage.exists(): |
||
pywikibot.output(u"Base language category 'Categoria:%s' does not yet exist" % langcat) |
|||
log(u"Categoria base para língua '''%s''' com nome 'Categoria:%s' não existe. Entrada '%s' não adicionada" % |
log(u"Categoria base para língua '''%s''' com nome 'Categoria:%s' não existe. Entrada '%s' não adicionada" % |
||
(langcode, langcat, transl)) |
(langcode, langcat, transl)) |
||
return False |
return False |
||
text = u"""={{-%s-}}= |
text = u"""={{-%s-}}= |
||
==%s== |
==%s== |
||
Linha 210: | Linha 210: | ||
else: |
else: |
||
text = text % (langcode, pos, transl, title, u' (' + gl + u')', title, langcode, pos, langcat) |
text = text % (langcode, pos, transl, title, u' (' + gl + u')', title, langcode, pos, langcat) |
||
try: |
try: |
||
page.put(text, comment = u"Criada automaticamente a partir das traduções em [[%s]]" % title, minorEdit = False) |
page.put(text, comment = u"Criada automaticamente a partir das traduções em [[%s]]" % title, minorEdit = False) |
||
except |
except pywikibot.PageNotSaved: |
||
print "Failed to save page" |
print "Failed to save page" |
||
return False |
return False |
||
Linha 222: | Linha 222: | ||
print "Socket error, maybe not saving page" |
print "Socket error, maybe not saving page" |
||
return False |
return False |
||
newCat = |
newCat = pywikibot.Page(site, u"Categoria:!Entrada criada por robô (%s)" % langcat) |
||
if not newCat.exists() or (not newCat.isRedirectPage() and newCat.isEmpty()): |
if not newCat.exists() or (not newCat.isRedirectPage() and newCat.isEmpty()): |
||
# Create the auto pages category for this language |
# Create the auto pages category for this language |
||
pywikibot.output(u"Creating auto page category for language '%s'" % langcode) |
|||
newCat.put(u"{{catpagautolíngua|%s}}" % langcode, u"Criada automaticamente") |
newCat.put(u"{{catpagautolíngua|%s}}" % langcode, u"Criada automaticamente") |
||
return True |
return True |
||
# Converts the Part of Speech to the AO1990 |
# Converts the Part of Speech to the AO1990 |
||
def convertOrtography(pos): |
def convertOrtography(pos): |
||
Linha 237: | Linha 237: | ||
return 'Adjetivo' |
return 'Adjetivo' |
||
return pos |
return pos |
||
def main(): |
def main(): |
||
global repact |
global repact, site |
||
socket.setdefaulttimeout(30) |
socket.setdefaulttimeout(30) |
||
pageToProcess = None |
pageToProcess = None |
||
for arg in sys.argv[1:]: |
for arg in sys.argv[1:]: |
||
if arg.startswith('-'): |
if arg.startswith('-'): |
||
print 'Arguments not supported yet' |
print 'Arguments not supported yet' |
||
else: pageToProcess = unicode(arg, 'latin1') |
else: pageToProcess = unicode(arg, 'latin1') |
||
# make sure we are logged in |
# make sure we are logged in |
||
site = |
site = pywikibot.getSite() |
||
site.forceLogin() |
site.forceLogin() |
||
config.put_throttle = 1 |
config.put_throttle = 1 |
||
rehead = re.compile(r'={1,4}(.+?)={1,4}') |
rehead = re.compile(r'={1,4}(.+?)={1,4}') |
||
rehead2 = re.compile(r'={2}(.+?)={2}') |
rehead2 = re.compile(r'={2}(.+?)={2}') |
||
Linha 267: | Linha 267: | ||
reglosstune = re.compile(r'(.*?)\((.*?)\)') |
reglosstune = re.compile(r'(.*?)\((.*?)\)') |
||
reglosstune2 = re.compile(r'(.*?):\s?(.*)') |
reglosstune2 = re.compile(r'(.*?):\s?(.*)') |
||
partsOfSpeech = set(['Substantivo', 'Adjetivo', 'Verbo', 'Pronome', 'Locução substantiva', 'Numeral']) |
partsOfSpeech = set(['Substantivo', 'Adjetivo', 'Verbo', 'Pronome', 'Locução substantiva', 'Numeral']) |
||
stops = set([]) |
stops = set([]) |
||
if (pageToProcess == None): |
if (pageToProcess == None): |
||
entry = site.randompage() |
entry = site.randompage() |
||
pageToProcess = entry.title() |
pageToProcess = entry.title() |
||
else: |
else: |
||
entry = |
entry = pywikibot.Page(site, pageToProcess) |
||
pywikibot.output(u"Getting page '%s'" % entry.title()) |
|||
if entry.namespace() != 0: |
if entry.namespace() != 0: |
||
pywikibot.output(u"Not an article") |
|||
return |
return |
||
text = entry.get() |
text = entry.get() |
||
if not u'=Português=' in text and not u'={{pt}}=' in text and not u'={{-pt-}}=' in text and not u'= Português =' in text and not u'= {{pt}} =' in text and not u'= {{-pt-}} =' in text: |
if not u'=Português=' in text and not u'={{pt}}=' in text and not u'={{-pt-}}=' in text and not u'= Português =' in text and not u'= {{pt}} =' in text and not u'= {{-pt-}} =' in text: |
||
pywikibot.output(u'No appropriate PT language header') |
|||
return |
return |
||
""" if not u'==Tradução==' in text and not u'==Traduções==' in text and not u'=={{tradu}}==' in text: |
""" if not u'==Tradução==' in text and not u'==Traduções==' in text and not u'=={{tradu}}==' in text: |
||
pywikibot.output(u'No appropriate Tradução language header') |
|||
return""" |
return""" |
||
lines = text.splitlines() |
lines = text.splitlines() |
||
intrans = False |
intrans = False |
||
Linha 296: | Linha 296: | ||
if mo: |
if mo: |
||
header = mo.group(1).strip() |
header = mo.group(1).strip() |
||
pywikibot.output(u'Current header: %s' % header) |
|||
if header == u"Tradução" or header == u'{{tradução}}' or header == u'Traduções': |
if header == u"Tradução" or header == u'{{tradução}}' or header == u'Traduções': |
||
if not (rehead3.match(lines[i])): |
if not (rehead3.match(lines[i])): |
||
pywikibot.output(u'Header not on level 3: skipping') |
|||
return |
return |
||
intrans = True |
intrans = True |
||
pywikibot.output(u'INTRANS') |
|||
gloss = '' |
gloss = '' |
||
else: intrans = False |
else: intrans = False |
||
if header in partsOfSpeech and rehead2.match(lines[i]): |
if header in partsOfSpeech and rehead2.match(lines[i]): |
||
pos = convertOrtography(header) |
pos = convertOrtography(header) |
||
pywikibot.output("PoS: %s" % pos) |
|||
if header in stops and rehead2.match(lines[i]): pos = '' |
if header in stops and rehead2.match(lines[i]): pos = '' |
||
continue |
continue |
||
if not intrans: continue |
if not intrans: continue |
||
mo = regloss.match(lines[i]) |
mo = regloss.match(lines[i]) |
||
if mo: |
if mo: |
||
Linha 323: | Linha 323: | ||
if mo: |
if mo: |
||
gloss = mo.group(2).strip() |
gloss = mo.group(2).strip() |
||
pywikibot.output("Gloss: %s" % gloss) |
|||
continue |
continue |
||
# Try all the possible translation variants |
# Try all the possible translation variants |
||
transls = [] |
transls = [] |
||
Linha 332: | Linha 332: | ||
lang = mo.group(1).strip() |
lang = mo.group(1).strip() |
||
transls += mo.group(2).strip().split('|') |
transls += mo.group(2).strip().split('|') |
||
mo = retrans2.match(lines[i]) |
mo = retrans2.match(lines[i]) |
||
if mo: |
if mo: |
||
lang = mo.group(2).strip() |
lang = mo.group(2).strip() |
||
transls += mo.group(3).strip().split('|') |
transls += mo.group(3).strip().split('|') |
||
mo = retrans3.match(lines[i]) |
mo = retrans3.match(lines[i]) |
||
if mo: |
if mo: |
||
lang = mo.group(1).strip() |
lang = mo.group(1).strip() |
||
transls += mo.group(2).strip().split('|') |
transls += mo.group(2).strip().split('|') |
||
mo = retrans4.match(lines[i]) |
mo = retrans4.match(lines[i]) |
||
if mo: |
if mo: |
||
lang = mo.group(2).strip() |
lang = mo.group(2).strip() |
||
transls += mo.group(3).strip().split('|') |
transls += mo.group(3).strip().split('|') |
||
mo = retrans5.match(lines[i]) |
mo = retrans5.match(lines[i]) |
||
if mo: |
if mo: |
||
lang = mo.group(2).strip() |
lang = mo.group(2).strip() |
||
transls += mo.group(3).strip().split('|') |
transls += mo.group(3).strip().split('|') |
||
if len(transls) == 0: continue |
if len(transls) == 0: continue |
||
pywikibot.output(u"Found translations '%s' for language '%s'" % (transls, lang)) |
|||
if lang in ignoreLangs: |
if lang in ignoreLangs: |
||
pywikibot.output(u"Skipping translations for ignored language '%s'" % lang) |
|||
continue |
continue |
||
for transl in transls: |
for transl in transls: |
||
if len(transl) > 0: |
if len(transl) > 0: |
||
createFLentry(transl, lang, pos, entry.title(), gloss) |
createFLentry(transl, lang, pos, entry.title(), gloss) |
||
pywikibot.output(u'Done %s' % pageToProcess); |
|||
if __name__ == "__main__": |
if __name__ == "__main__": |
||
try: |
try: |
||
main() |
main() |
||
finally: |
finally: |
||
pywikibot.stopme() |
|||
</source> |
</source> |
||
Revisão das 16h45min de 13 de agosto de 2015
Esta conta de usuário destina-se a realizar as operações do robô controlado por Malafaya (discussão). Não é uma conta fantoche (sock puppet), mas uma conta automática ou semi-automática para fazer edições repetitivas. Administrador: se este robô apresenta mal funcionamento ou causa problemas, bloqueie-o e avise o operador responsável. Status de robô: verificar se está marcado como robô | Pedido: ver página do pedido de aprovação |
Cabeçalhos de idioma
replace.py -regex -ns:0 -always -pt:1 -multiline -catr:Papiamento "^\=(\s*?)(\{\{pap\}\}|Papiamento)(\s*?)\=" "={{-pap-}}="
Shortcut
usernames['wiktionary']['af'] = 'Malafaya' usernames['wiktionary']['ang'] = 'Malafaya' usernames['wiktionary']['an'] = 'Malafaya' usernames['wiktionary']['ar'] = 'Malafaya' usernames['wiktionary']['ast'] = 'Malafaya' usernames['wiktionary']['ay'] = 'Malafaya' usernames['wiktionary']['az'] = 'Malafaya' usernames['wiktionary']['be'] = 'Malafaya' usernames['wiktionary']['bs'] = 'Malafaya' usernames['wiktionary']['br'] = 'Malafaya' usernames['wiktionary']['bg'] = 'Malafaya' usernames['wiktionary']['ca'] = 'Malafaya' usernames['wiktionary']['co'] = 'Malafaya' usernames['wiktionary']['cs'] = 'Malafaya' usernames['wiktionary']['csb'] = 'Malafaya' usernames['wiktionary']['cy'] = 'Malafaya' usernames['wiktionary']['da'] = 'Malafaya' usernames['wiktionary']['de'] = 'Malafaya' usernames['wiktionary']['et'] = 'Malafaya' usernames['wiktionary']['el'] = 'Malafaya' usernames['wiktionary']['es'] = 'Malafaya' usernames['wiktionary']['eo'] = 'Malafaya' usernames['wiktionary']['en'] = 'Malafaya' usernames['wiktionary']['eu'] = 'Malafaya' usernames['wiktionary']['fa'] = 'Malafaya' usernames['wiktionary']['fi'] = 'Malafaya' usernames['wiktionary']['fo'] = 'Malafaya' usernames['wiktionary']['fr'] = 'Malafaya' usernames['wiktionary']['fy'] = 'Malafaya' usernames['wiktionary']['ga'] = 'Malafaya' usernames['wiktionary']['gv'] = 'Malafaya' usernames['wiktionary']['gl'] = 'Malafaya' usernames['wiktionary']['gu'] = 'Malafaya' usernames['wiktionary']['ka'] = 'Malafaya' usernames['wiktionary']['kl'] = 'Malafaya' usernames['wiktionary']['km'] = 'Malafaya' usernames['wiktionary']['kn'] = 'Malafaya' usernames['wiktionary']['ko'] = 'Malafaya' usernames['wiktionary']['he'] = 'Malafaya' usernames['wiktionary']['hi'] = 'Malafaya' usernames['wiktionary']['hr'] = 'Malafaya' usernames['wiktionary']['hsb'] = 'Malafaya' usernames['wiktionary']['hy'] = 'Malafaya' usernames['wiktionary']['id'] = 'Malafaya' usernames['wiktionary']['io'] = 'Malafaya' usernames['wiktionary']['is'] = 'Malafaya' usernames['wiktionary']['it'] = 'Malafaya' usernames['wiktionary']['iu'] = 'Malafaya' usernames['wiktionary']['ja'] = 'Malafaya' usernames['wiktionary']['kk'] = 'Malafaya' usernames['wiktionary']['ko'] = 'Malafaya' usernames['wiktionary']['ku'] = 'Malafaya' usernames['wiktionary']['kw'] = 'Malafaya' usernames['wiktionary']['ky'] = 'Malafaya' usernames['wiktionary']['lo'] = 'Malafaya' usernames['wiktionary']['la'] = 'Malafaya' usernames['wiktionary']['lb'] = 'Malafaya' usernames['wiktionary']['ln'] = 'Malafaya' usernames['wiktionary']['lt'] = 'Malafaya' usernames['wiktionary']['li'] = 'Malafaya' usernames['wiktionary']['lv'] = 'Malafaya' usernames['wiktionary']['hu'] = 'Malafaya' usernames['wiktionary']['mg'] = 'Malafaya' usernames['wiktionary']['ml'] = 'Malafaya' usernames['wiktionary']['mk'] = 'Malafaya' usernames['wiktionary']['mn'] = 'Malafaya' usernames['wiktionary']['ms'] = 'Malafaya' usernames['wiktionary']['my'] = 'Malafaya' usernames['wiktionary']['na'] = 'Malafaya' usernames['wiktionary']['nah'] = 'Malafaya' usernames['wiktionary']['nds'] = 'Malafaya' usernames['wiktionary']['ne'] = 'Malafaya' usernames['wiktionary']['nl'] = 'Malafaya' usernames['wiktionary']['no'] = 'Malafaya' usernames['wiktionary']['oc'] = 'Malafaya' usernames['wiktionary']['om'] = 'Malafaya' usernames['wiktionary']['pa'] = 'Malafaya' usernames['wiktionary']['pl'] = 'Malafaya' usernames['wiktionary']['pt'] = 'Malafaya' usernames['wiktionary']['ro'] = 'Malafaya' usernames['wiktionary']['roa-rup'] = 'Malafaya' usernames['wiktionary']['ru'] = 'Malafaya' usernames['wiktionary']['scn'] = 'Malafaya' usernames['wiktionary']['sh'] = 'Malafaya' usernames['wiktionary']['si'] = 'Malafaya' usernames['wiktionary']['simple'] = 'Malafaya' usernames['wiktionary']['sk'] = 'Malafaya' usernames['wiktionary']['sl'] = 'Malafaya' usernames['wiktionary']['sq'] = 'Malafaya' usernames['wiktionary']['sr'] = 'Malafaya' usernames['wiktionary']['su'] = 'Malafaya' usernames['wiktionary']['sv'] = 'Malafaya' usernames['wiktionary']['ta'] = 'Malafaya' usernames['wiktionary']['te'] = 'Malafaya' usernames['wiktionary']['tg'] = 'Malafaya' usernames['wiktionary']['th'] = 'Malafaya' usernames['wiktionary']['tl'] = 'Malafaya' usernames['wiktionary']['tk'] = 'Malafaya' usernames['wiktionary']['tpi'] = 'Malafaya' usernames['wiktionary']['tr'] = 'Malafaya' usernames['wiktionary']['tt'] = 'Malafaya' usernames['wiktionary']['ug'] = 'Malafaya' usernames['wiktionary']['uk'] = 'Malafaya' usernames['wiktionary']['uz'] = 'Malafaya' usernames['wiktionary']['vi'] = 'Malafaya' usernames['wiktionary']['vo'] = 'Malafaya' usernames['wiktionary']['wa'] = 'Malafaya' usernames['wiktionary']['yi'] = 'Malafaya' usernames['wiktionary']['za'] = 'Malafaya' usernames['wiktionary']['zh'] = 'Malafaya' usernames['wiktionary']['zh-min-nan'] = 'Malafaya' usernames['wiktionary']['zu'] = 'Malafaya'
Tbot.pt
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import sys
import re
import socket
import pywikibot
import codecs
import query
from pywikibot import config
site = None
reparsedtext = re.compile(r'<p>(.*)</p>')
ignoreLangs = ['ine', 'ine-pro']
# Log bot warnings
def log(message):
page = pywikibot.Page(site, u"Usuário:MalafayaBot/Log")
if page.exists():
text = page.get()
else:
text = ''
text += u"\r\n* ~~~~~: " + message
page.put(text, "Mensagem de log do bot")
# Gets the base language category for a language code
def getLangCat(langcode):
params = {
'action' : 'parse',
'text' : u'{{nome categoria|%s}}' % langcode,
'contentmodel' : 'wikitext',
'prop' : 'text',
}
datas = query.GetData(params, site)
data=datas['parse']['text'].values()[0]
mo = reparsedtext.match(data)
return mo.group(1).strip()
def createFLentry(transl, langcode, pos, title, gloss):
# Build a page
page = pywikibot.Page(site, transl)
if page.exists():
if page.isRedirectPage():
pywikibot.output(u"Page '%s' is redirect. Skipping" % transl)
log(u"Tentativa de criação de entrada da língua '''%s''' sobre redirecionamento \"%s\"" %
(langcode, transl))
return True # meaning there is a page there now
if not page.isEmpty():
pywikibot.output(u"Page '%s' already has contents. Skipping" % transl)
return True # meaning there is a page there now
# check language section later ...
# Decap gloss (some people insist on capitalizing it, which is wrong) this is almost always right:
gl = gloss.lower()
if "translation" in gl:
log("word 'translation' in gloss, skipped")
return False
if gl[1:] != gloss[1:]: gl = gloss # caps in string after first, so probably okay
langcat = getLangCat(langcode)
pywikibot.output(u"Language category returned is %s" % langcat)
# Check if base language category exists (we don't want to add words for languages that are not yet cataloged or whose name is not well specified)
langCatPage = pywikibot.Page(site, u"Categoria:%s" % langcat)
if not langCatPage.exists():
pywikibot.output(u"Base language category 'Categoria:%s' does not yet exist" % langcat)
log(u"Categoria base para língua '''%s''' com nome 'Categoria:%s' não existe. Entrada '%s' não adicionada" %
(langcode, langcat, transl))
return False
text = u"""={{-%s-}}=
==%s==
'''%s'''
# [[%s]]%s
{{página-automática|{{subst:CURRENTMONTHNAME}}|{{subst:CURRENTYEAR}}|%s|%s}}
[[Categoria:%s (%s)]]
"""
if gl == '':
text = text % (langcode, pos, transl, title, '', title, langcode, pos, langcat)
else:
text = text % (langcode, pos, transl, title, u' (' + gl + u')', title, langcode, pos, langcat)
try:
page.put(text, comment = u"Criada automaticamente a partir das traduções em [[%s]]" % title, minorEdit = False)
except pywikibot.PageNotSaved:
print "Failed to save page"
return False
except socket.timeout:
print "Socket timeout, maybe not saving page"
return False
except socket.error:
print "Socket error, maybe not saving page"
return False
newCat = pywikibot.Page(site, u"Categoria:!Entrada criada por robô (%s)" % langcat)
if not newCat.exists() or (not newCat.isRedirectPage() and newCat.isEmpty()):
# Create the auto pages category for this language
pywikibot.output(u"Creating auto page category for language '%s'" % langcode)
newCat.put(u"{{catpagautolíngua|%s}}" % langcode, u"Criada automaticamente")
return True
# Converts the Part of Speech to the AO1990
def convertOrtography(pos):
if pos == 'Adjectivo':
return 'Adjetivo'
return pos
def main():
global repact, site
socket.setdefaulttimeout(30)
pageToProcess = None
for arg in sys.argv[1:]:
if arg.startswith('-'):
print 'Arguments not supported yet'
else: pageToProcess = unicode(arg, 'latin1')
# make sure we are logged in
site = pywikibot.getSite()
site.forceLogin()
config.put_throttle = 1
rehead = re.compile(r'={1,4}(.+?)={1,4}')
rehead2 = re.compile(r'={2}(.+?)={2}')
rehead3 = re.compile(r'={3}(.+?)={3}')
regloss = re.compile(r'\{\{tradini\|(.*?)}}')
retrans = re.compile(r'\*\s?\{\{trad\|(.*?)\|(.*?)}}')
retrans2 = re.compile(r'\*(.*?)\s?\{\{xlatio\|(.*?)\|(.*?)(\|.*)?}}')
retrans3 = re.compile(r'\*\s?\{\{trad-\|(.*?)\|(.*?)(\|.*)?}}')
retrans4 = re.compile(r'\*(.*?)\s?\{\{t\|(.*?)\|(.*?)(\|.*)?}}')
retrans5 = re.compile(r'\*(.*?)\s?\{\{t\+\|(.*?)\|(.*?)(\|.*)?}}')
reendtrans = re.compile(r'\{\{tradfim}}')
reglosstune = re.compile(r'(.*?)\((.*?)\)')
reglosstune2 = re.compile(r'(.*?):\s?(.*)')
partsOfSpeech = set(['Substantivo', 'Adjetivo', 'Verbo', 'Pronome', 'Locução substantiva', 'Numeral'])
stops = set([])
if (pageToProcess == None):
entry = site.randompage()
pageToProcess = entry.title()
else:
entry = pywikibot.Page(site, pageToProcess)
pywikibot.output(u"Getting page '%s'" % entry.title())
if entry.namespace() != 0:
pywikibot.output(u"Not an article")
return
text = entry.get()
if not u'=Português=' in text and not u'={{pt}}=' in text and not u'={{-pt-}}=' in text and not u'= Português =' in text and not u'= {{pt}} =' in text and not u'= {{-pt-}} =' in text:
pywikibot.output(u'No appropriate PT language header')
return
""" if not u'==Tradução==' in text and not u'==Traduções==' in text and not u'=={{tradu}}==' in text:
pywikibot.output(u'No appropriate Tradução language header')
return"""
lines = text.splitlines()
intrans = False
for i in range(0, len(lines)):
mo = rehead.match(lines[i])
if mo:
header = mo.group(1).strip()
pywikibot.output(u'Current header: %s' % header)
if header == u"Tradução" or header == u'{{tradução}}' or header == u'Traduções':
if not (rehead3.match(lines[i])):
pywikibot.output(u'Header not on level 3: skipping')
return
intrans = True
pywikibot.output(u'INTRANS')
gloss = ''
else: intrans = False
if header in partsOfSpeech and rehead2.match(lines[i]):
pos = convertOrtography(header)
pywikibot.output("PoS: %s" % pos)
if header in stops and rehead2.match(lines[i]): pos = ''
continue
if not intrans: continue
mo = regloss.match(lines[i])
if mo:
gloss = mo.group(1).strip()
mo = reglosstune.match(gloss)
if mo:
gloss = mo.group(2).strip()
else:
mo = reglosstune2.match(gloss)
if mo:
gloss = mo.group(2).strip()
pywikibot.output("Gloss: %s" % gloss)
continue
# Try all the possible translation variants
transls = []
mo = retrans.match(lines[i])
if mo:
lang = mo.group(1).strip()
transls += mo.group(2).strip().split('|')
mo = retrans2.match(lines[i])
if mo:
lang = mo.group(2).strip()
transls += mo.group(3).strip().split('|')
mo = retrans3.match(lines[i])
if mo:
lang = mo.group(1).strip()
transls += mo.group(2).strip().split('|')
mo = retrans4.match(lines[i])
if mo:
lang = mo.group(2).strip()
transls += mo.group(3).strip().split('|')
mo = retrans5.match(lines[i])
if mo:
lang = mo.group(2).strip()
transls += mo.group(3).strip().split('|')
if len(transls) == 0: continue
pywikibot.output(u"Found translations '%s' for language '%s'" % (transls, lang))
if lang in ignoreLangs:
pywikibot.output(u"Skipping translations for ignored language '%s'" % lang)
continue
for transl in transls:
if len(transl) > 0:
createFLentry(transl, lang, pos, entry.title(), gloss)
pywikibot.output(u'Done %s' % pageToProcess);
if __name__ == "__main__":
try:
main()
finally:
pywikibot.stopme()