So in this note I’d like to create a vanilla implementation of a morphology and syntax that might be used as a inductive bias for the emergent language.
Morphology
Let’s:
part of speech
open
closed
nouns
cases
tenses
aspects
moods
A baseline generative model for morphology
import randomimport csvrandom.seed(45)# start with a simple morphology and then add more complexity.## TODO: check we don't over flow the phoneme spaceclass base_morphology:def__init__(self, vowels=None, consonants=None, parts_of_speech_closed=None, parts_of_speech_open=None, declensions=None, nouns=None, ):# define the phonemesifnot vowels:self.vowels = ['a','e', 'i','o', 'u', 'aa','ee', 'ii','oo', 'uu','ai','au','ei','ou','ia','ua']else:self.vowels = vowelsifnot consonants:self.consonants = ['b', 'c','cs','ch', 'd','dh', 'dzh','f', 'g','gh', 'h','hw','ny', 'j', 'k','kw''l','ld','lh', 'm','mb', 'n','nc', 'nd','ng','ngw','nqu','nqt','nt', 'p', 'q', 'r','rd','rh', 's', 'sh', 't','tsh''v', 'w', 'x', 'y', 'z']else:self.consonants = consonants# define the parts of speechifnot parts_of_speech_closed:self.parts_of_speech_closed = ['pronoun','article','preposition','conjunction','numeral']else:self.parts_of_speech_closed = parts_of_speech_closedifnot parts_of_speech_open:self.parts_of_speech_open = ['noun','verb','adjective','adverb']else:self.parts_of_speech_open = parts_of_speech_openifnot declensions:self.declensions = ['nominative', # subject'accusative', # direct object'dative', # indirect object'instrumental',# with, by means of'causal', # for, for the purpose of'translative', # into'terminative', # as far as, up to'essive', # as 'inessive', # in'superessive', # on 'adessive', # by, at'illative', # into'sublative', # onto'allative', # to'elative', # out of'delative', # off, about'ablative', # from, away from'genitive', # of, 's 'locative', # location'vocative', # object being addressed 'partitive', # partialness'abessive', # without'comitative', # with ]else:self.declensions = declensions# define the nounsifnot nouns:self.nouns = ['monkey','falcon','puma','conda','tilapia','banana','kiwi','coconut','pear','river','mountain','ocean','lake','forest','clearing','valley','one','two','many',]else:self.nouns = nounsself.gen_parts_of_speech_dict()self.gen_dec_dict()self.gen_noun_dict()self.gen_plurals_dict()self.gen_inf_markers_dict()self.gen_tense_dict()self.gen_mood_dict()self.gen_aspect_dict()def generate_rnd_phone(self): # generate a random phonemereturn random.choice(self.consonants) + random.choice(self.vowels)def generate_num_phoneme(self,consonant, vowel):# pick a consonants cons from consonants c =self.consonants[consonant %len(self.consonants)]# pick a vowel from vowels v =self.vowels[vowel %len(self.vowels)]return c + v def generate_rnd_stem(self,k=3):# generate a random word with k phonemes word =''for i inrange(k): word +=self.generate_rnd_phone()return word def gen_parts_of_speech_dict(self):# generate a dictionary of parts of speech pos_markers = [""]+ [self.generate_num_phoneme(i, 0) for i inrange(len(self.parts_of_speech_open)-1)]self.pos_dict = {pos_markers[i]:self.parts_of_speech_open[i] for i inrange(len(self.parts_of_speech_open))}# currently the closed pos are ignored# The criterion for an ending to be a case (according to today's generative linguistic grammars of Hungarian) is that a word with that ending can be a compulsory argument of a verb. This difference is usually unimportant for average learners of the language.def gen_dec_dict(self):# generate a dictionary of declensions markers = [""]+ [self.generate_num_phoneme(i, 0) for i inrange(len(self.declensions)-1)]self.declenations_dict = {markers[i]:self.declensions[i] for i inrange(len(self.declensions))}def gen_plurals_dict(self):# generate a dictionary for plurals affixes## TODO make a parameterself.numbers = ['singular','plural'] markers = [""]+ [self.generate_num_phoneme(i, 0) for i inrange(len(self.numbers)-1)]self.plu_markers_dict = {markers[i]:self.numbers[i] for i inrange(len(self.numbers))}def gen_inf_markers_dict(self):# generate a dictionary for plurals affixes## TODO make a parameterself.inflections = ['1ps','2ps','3ps','1pp','2pp','3pp'] markers = [""]+ [self.generate_num_phoneme(i, 0) for i inrange(len(self.inflections)-1)]self.inf_markers_dict = {markers[i] : self.inflections[i] for i inrange(len(self.inflections))}def gen_tense_dict(self):# generate a dictionary for tenses affixes## TODO make a parameterself.tenses = ['past','present','future'] markers = [""]+ [self.generate_num_phoneme(i, 0) for i inrange(len(self.tenses)-1)]self.tense_markers_dict = {markers[i] : self.tenses[i] for i inrange(len(self.tenses))}def gen_mood_dict(self):# generate a dictionary for tenses affixes## TODO make a parameterself.moods = ['indicative','subjunctive','imperative','conditional','optative','jussive','interrogative','exclamatory'] markers = [""]+ [self.generate_num_phoneme(i, 0) for i inrange(len(self.moods)-1)]self.mood_markers_dict = {markers[i] : self.moods[i] for i inrange(len(self.moods))}def gen_aspect_dict(self):# generate a dictionary for tenses affixes## TODO make a parameterself.aspects = ['perfective','imperfective','progressive','habitual','frequentative','iterative'] markers = [""]+ [self.generate_num_phoneme(i, 0) for i inrange(len(self.aspects)-1)]self.aspects_dict = {markers[i] : self.aspects[i] for i inrange(len(self.aspects))}def gen_noun_dict(self):self.nouns = ['monkey','falcon','puma','conda','tilapia','banana','kiwi','coconut','pear','river','mountain','ocean','lake','forest','clearing','valley','one','two','many',]## 1. generate a stem for each noun stems = [self.generate_rnd_stem(3) for i inrange(len(self.nouns))]## 2. a dictionary of nounsself.nouns_dict = {stems[i]:self.nouns[i] for i inrange(len(self.nouns))}def gen_lexicon(self): lexicon = {}for stem in (nouns_dict):print(f'\n\nlemma: {stem} = {nouns_dict[stem]}')for pos in pos_dict: #print(pos)if pos_dict[pos] =='noun': for declension in declenations_dict: for plural in plu_markers_dict: lexeme =f'{stem}\'{pos}{declension}{plural}' features =f'{nouns_dict[stem]},{pos_dict[pos]},{declenations_dict[declension]},{plu_markers_dict[plural]}' lexicon[lexeme] = featuresprint(f'{lexeme} = {features}')elif pos_dict[pos] =='verb':for mood in mood_markers_dict:for tense in tense_markers_dict:for inflection in inf_markers_dict: lexeme =f'{stem}\'{pos}{mood}{tense}{inflection}' features =f'{nouns_dict[stem]},{pos_dict[pos]},{mood_markers_dict[mood]},{tense_markers_dict[tense]},{inf_markers_dict[inflection]}' lexicon[lexeme] = featuresprint(f'{lexeme} = {features}')else: lexeme=f'{stem}\'{pos}' features =f'{nouns_dict[stem]},{pos_dict[pos]}' lexicon[lexeme] = featuresprint(f'{lexeme} = {features}')def export_lemmas(self,lexicon,filename='lexicon.csv'):# export the lexicon to a csv filewithopen(filename, 'w') as f: writer = csv.writer(f) writer.writerow(['lemma', 'features'])for lemma, features inself.lexicon.items(): writer.writerow([lemma, features]) base = base_morphology( vowels=['a','e', 'i','o', 'u',], consonants=['b', 'c', 'd','f', 'g','h', 'j', 'k', 'l','m','n', 'p', 'q'], parts_of_speech_closed=['pronoun','article','preposition'], parts_of_speech_open=['noun','verb'], declensions=['nominative'], nouns=['monkey','falcon'],)print(f'{base.generate_rnd_phone()=}')print(f'{base.generate_num_phoneme(3, 2)}')print(f'{base.generate_rnd_stem(3)}')print (f'{base.pos_dict=}')print (f'{base.declenations_dict=}')print (f'{base.nouns_dict=}')print (f'{base.plu_markers_dict=}')print (f'{base.inf_markers_dict=}')print (f'{base.tense_markers_dict=}')print (f'{base.mood_markers_dict=}')print (f'{base.aspects_dict=}')#export_lemmas(lexicon)# f = open('dict.csv','wb')# w = csv.DictWriter(f,mydict.keys())# w.writerow(mydict)# f.close()