User:Yinweichen/mplist.py

# -*- coding: utf-8 -*-

# Winston Yin 2015-12-23
# Takes in txt file of discovery circumstances, downloaded from
# MPC website, and site and discoverer translation libraries.
# Mode 0: Collects new terms not seen in the translation libs.
# Mode 1: Takes in fully compiled translation libs and generates
# dict.txt as input for pagefromfile.py

import re

# Basically one row in the list
class planet:
	def __init__(self, row):
		self.n = int(row[1:7]) # Always use as integer
		self.name = row[8:29].strip()
		self.desig = row[29:41].strip()
		self.date = row[41:51].strip()
		self.site = row[53:78].strip()
		self.site2 = ''
		self.disc = row[78:].strip()
		self.site2 = ''

	def wikify(self):
		# self.name is the text now. self.n remains an integer
		if self.name == '':
			self.name = '小行星' + str(self.n)
		else:
			self.name = '[[小行星' + str(self.n) + ']]' + self.name
		if re.match(r'\d', self.desig[-1]): # If desig has trailing digit
			self.desig = '{{mp|' + self.desig[0:7] + '|' + self.desig[7:] + '}}'
		self.date = self.date[0:4] + '年' + str(int(self.date[5:7])) + '月' + str(int(self.date[8:10])) + '日'

# Easier to have files and strings and tables in a class
class mplist:
	def __init__(self):
		self.infile = open('minorlist.txt')
		self.outfile = open('dict.txt', 'w')
		self.sitefile = open('sites.txt', 'r+') # Do r+ and r make a difference?
		self.discfile = open('discoverers.txt', 'r+')
		self.planets = make_planets(load_table(self.infile))
		self.sitelib = load_table(self.sitefile)
		self.disclib = load_table(self.discfile)

	# Collects new terms and appends them to files
	def collect(self):
		counter1 = 0
		counter2 = 0
		sitetext = ''
		disctext = ''
		for p in self.planets:
			if p.site not in self.sitelib:
				self.sitelib.append(p.site)
				sitetext += p.site + '\n\n\n'
				counter1 += 1
			if p.disc not in self.disclib:
				self.disclib.append(p.disc)
				disctext += p.disc + '\n\n\n'
				counter2 += 1
		self.sitefile.write(sitetext)
		self.discfile.write(disctext)
		self.sitefile.close()
		self.discfile.close()
		print(str(counter1) + ' sites added.')
		print(str(counter2) + ' discoverers added.')

	# Wikify every row
	def wikify(self):
		for i in range(len(self.planets)):
			p = self.planets[i]
			p.wikify()
			p.site2 = self.translate(i, 'site')
			p.disc2 = self.translate(i, 'disc')

	# Find string in library, translated it and wikify it
	def translate(self, i, mode):
		if mode == "site":
			string = self.planets[i].site
			lib = self.sitelib
			prev_string = self.planets[i-1].site if i != 0 else ''
		elif mode == "disc":
			string = self.planets[i].disc
			lib = self.disclib
			prev_string = self.planets[i-1].disc if i != 0 else ''
		# If different from the previous entry
		if i == 0 or string != prev_string:
			# If no translation is available
			if lib[lib.index(string)+1] == '':
				new_string = '[[' + string + ']]'
			# Translate it
			else:
				new_string = '[[' + lib[lib.index(string)+1] + ']]'
		# If same as the previous entry, and translation is available
		else:
			if lib[lib.index(string)+2] == '':
				new_string = string
			else:
				new_string = lib[lib.index(string)+2]
		return new_string

	# Generate dict.txt
	def write_dict(self):
		counter1 = 0
		counter2 = 0
		outtext = ''
		for p in self.planets:
			# Thousands pages
			if p.n % 1000 == 1:
				outtext += 'xxxx----小行星列表/' + str(p.n) + '-' + str(p.n+999)\
						   + '----{{TOC001|prefix=' + str(p.n//1000) + '}}\n\n{{小行星列表/Header|main=yes}}\n'
				for j in range(10):
					outtext += '! colspan="5" style="background-color:silver;text-align:center;" id="'\
							   + str(j) + '01" | ' + str(p.n+j*100) + '-' + str(p.n+j*100+99)\
							   + ' <small class="plainlinks"><nowiki>[</nowiki>[{{SERVER}}{{localurl:小行星列表/'\
							   + str(p.n+j*100) + '-' + str(p.n+j*100+99) + '|action=edit}} 編輯]]</small>\n'\
							   + '{{:小行星列表/' + str(p.n+j*100) + '-' + str(p.n+j*100+99) + '}}\n'
				outtext += '|}\n\n{{小行星列表/helper1000|' + str(p.n//1000) + '}}yyyy\n\n\n'
				print(p.n) # Progress
				counter1 += 1
			# Hundreds pages
			if p.n % 100 == 1:
				outtext += 'xxxx----小行星列表/' + str(p.n) + '-' + str(p.n+99)\
						   + '----<noinclude>{{小行星列表/Header}}</noinclude>\n'
			# Each row
			outtext += '|-\n| ' + p.name + ' || ' + p.desig + ' || ' + p.date\
					   + ' || ' + p.site2 + ' || ' + p.disc2 + '\n'
			if p.n % 100 == 0:
				outtext += '|-\n<noinclude>{{小行星列表/Helper|' + str(p.n//100-1) + '}}\n\n</noinclude>yyyy\n\n\n'
				counter2 += 1
		self.outfile.write(outtext)
		self.outfile.close()
		print(str(counter1) + ' thousands pages and ' + str(counter2) + ' hundreds pages generated.')

# Recursively asks for the an option (0/1)
def ask_mode(repeat = False):
	if not repeat:
		option = input('Back up sites.txt and discoverers.txt first. Collecting terms (0), or reading as library (1)? ')
	else:
		option = input('Enter again. ')
	if option in [0, 1]:
		return option
	else:
		return ask_mode(repeat = True)

# Load a file into a list of rows
def load_table(file):
	table = []
	for line in file:
		table.append(line.strip())
	return table

# Converts a list of rows to list of planets
def make_planets(table):
	planets = []
	for row in table:
		new_planet = planet(row)
		planets.append(new_planet)
	return planets

# Main sequence
def run():
	option = ask_mode()
	newlist = mplist()
	if option == 0:
		newlist.collect()
	elif option == 1:
		newlist.wikify()
		newlist.write_dict()

run()