from mesa import Agent, Modelfrom mesa.time import RandomActivationclass Urn:def__init__(self, options, balls=None):self.options = optionsif balls isnotNone:self.balls = ballselse:self.balls = {option: 1.0for option inself.options}def get_filtered_urn(self, filter):'''Filters urn's options by prefix and normalizes the weights.''' filtered_options = [k for k inself.balls.keys() if k.startswith(filter)]assertlen(filtered_options) >0, f"no options found for filter={filter} on {self.balls}" filtered_balls = {opt: self.balls[opt] for opt in filtered_options} total_balls =sum(filtered_balls.values())assert total_balls >0.0, f"total weights is {total_balls} after filter={filter} on {self.balls}" filtered_probs = {opt: self.balls[opt]/total_balls for opt in filtered_options}return filtered_probsdef choose_option(self, filter, model):'''Filters the urn based on a option prefix (state for sender, signal for reciever). In the litrature agents have multiple urns to support learning conditional probabilites for differnt context. - sender need one urns per state, and - recievers need one urn per signal. I choose a simpler representation by implemented multiple urns as a single matrix To get the wieghts coresponding to a for a given prefix we filter the urn based on the prefix. allow updating the conditional probabilities for each signal given a state. We have one urn and estimate the conditional probabilities by filtering the urn based on the prefix. '''ifself.verbose:print(f'choose_option({filter=})') urn =self.get_filtered_urn(filter)return model.random.choices(list(urn.keys()), list(urn.values()))def update_weights(self, option, reward): old_balls =self.balls[option]self.balls[option] += reward ifself.verbose:print(f"Updated weight for option {option}: {old_balls} -> {self.balls[option]}")class HerrnsteinRL(Urn):''' Herrnstein matching law with learning rate.'''def__init__(self, options, learning_rate=1.0, verbose=False, name='Herrnstein matching law'):#add docstring''' Herrnstein matching law with learning rate. Parameters: options: list of options learning_rate: float, default 1.0 should behave like the parent urn model verbose: bool, default False name: str, the rule name 'Herrnstein matching law' '''super().__init__(options)self.verbose = verboseself.name = nameself.learning_rate = learning_rateself.options = optionsifself.verbose:print(f'LearningRule.__init__(Options: {options})')def update_weights(self, option, reward):''' this adds the learning rate to the update''' old_balls =self.balls[option]self.balls[option] +=self.learning_rate * reward ifself.verbose:print(f"Updated weight for option {option}: {old_balls} -> {self.balls[option]}")class LewisAgent(Agent):def__init__(self, unique_id, model, game, role, verbose=False):'''Agent for Lewis signaling game. Parameters: unique_id: int, unique identifier model: SignalingGame, the model game: int, the game number role: str, the role of the agent verbose: bool, default False '''super().__init__(unique_id, model)self.role = roleself.verbose = verboseself.message =Noneself.action =Noneself.game = gameself.current_state =Noneif role =="sender":self.urn = HerrnsteinRL(model.states_signals, learning_rate=1.0, verbose=verbose, name='state_signal_weights')elif role =="receiver":self.urn = HerrnsteinRL(model.signals_actions, learning_rate=1.0, verbose=verbose, name='signal_action_weights')else:# consider adding an urn for nature to use for choosing states# this way one could use simple modifcation of the urn class to to support some basic distribution via their urn model.# and we could also visualize the urns and their weights using a simple schematicself.urn = HerrnsteinRL(model.states, learning_rate=0.0, verbose=verbose, name='state_weights')self.messages = []self.actions = []self.reward =0def step(self):self.messages = []self.actions = []def gen_state(self):ifself.role =="nature":#self.current_state = self.model.random.choice(self.model.states)#use the urn to choose the stateself.current_state =self.urn.choose_option(filter='', model=self.model)[0]ifself.verbose:print(f"Nature {self.unique_id} set state {self.current_state}")@propertydef state(self):ifself.role =="nature":returnself.current_statedef choose_signal(self, state):ifself.role =="sender":self.option =self.urn.choose_option(filter=state, model=self.model)self.signal =self.option[0].split('_')[1]ifTrue:#self.verbose:print(f"Sender {self.unique_id} sends signal: {self.signal}")returnself.signaldef send_signal(self, state, receiver):ifself.role =="sender":asserttype(state) ==str, f"state must be a string"assertlen(state) >0, f"state must be a non-empty string"assert receiver isnotNone, f"receiver must be a valid agent"assert state inself.model.states, f"{state=} must be in {self.model.states}" signal =self.choose_signal(state) receiver.messages.append(signal)ifself.verbose:print(f"Sender {self.unique_id} sends signal: {signal}")def fuse_actions(self, actions):self.action =0ifself.role =="receiver":iflen(actions) ==1:self.action = actions[0]else:for i inrange(len(actions)):self.action +=int(actions[i]) * (2** i)returnself.actiondef decode_message(self, signal):ifself.role =="receiver": message =self.urn.choose_option(filter=signal, model=self.model)ifself.verbose:print(f"Receiver {self.unique_id} received signal: {self.message}")return messagedef set_action(self):'''Receiver decodes each message then then fuses them into one action'''ifself.role =="receiver":for signal inself.messages:asserttype(signal) ==str, f"{signal=} must be a string"self.actions.append(self.decode_message(signal)) action =self.fuse_actions(self.actions)ifself.verbose:print(f"Receiver {self.unique_id} decided on action: {action}")def set_reward(self,reward):ifself.role !="nature":self.reward = rewardifself.verbose:print(f"agent {self.unique_id} received reward: {self.reward}")def calc_reward(self,state):''' only reveiver calculates reward'''ifself.role =="receiver": action =self.action reward =1.0if action == state else0.0self.model.reward = rewardclass SignalingGame(Model):def__init__(self, game_count=2, senders_count=1, receivers_count=1, state_count=3,verbose=False):super().__init__()self.verbose = verboseself.schedule = RandomActivation(self)# states, signals, and actionsself.states = [f'{i}'for i inrange(state_count)]self.signals = [chr(65+ i) for i inrange(state_count)]self.actions = [f'{i}'for i inrange(state_count)]# urn options for sender and receiverself.states_signals = [f'{state}_{signal}'for state inself.states for signal inself.signals]self.signals_actions = [f'{signal}_{action}'for signal inself.signals for action inself.actions]self.current_state =Noneself.games = []self.uid =0self.senders_count = senders_countself.receivers_count = receivers_countfor i inrange(game_count): game = {'senders': [], 'receivers': [], 'nature': None} nature = LewisAgent(self.uid, self, game=i, role="nature") game['nature'] = natureself.schedule.add(nature)self.uid +=1for j inrange(senders_count): sender = LewisAgent(self.uid, self, game=i, role="sender") game['senders'].append(sender)self.schedule.add(sender)self.uid +=1for j inrange(receivers_count): receiver = LewisAgent(self.uid, self, game=i, role="receiver") game['receivers'].append(receiver)self.schedule.add(receiver)self.uid +=1self.games.append(game)def step(self):for agent inself.schedule.agents: agent.step()if agent.role =='nature': agent.gen_state()for agent inself.schedule.agents:if agent.role =='sender': state =self.games[agent.game]['nature'].current_statefor receiver inself.games[agent.game]['receivers']: agent.send_signal(state, receiver)for agent inself.schedule.agents:if agent.role =='receiver': agent.set_action() state =self.games[agent.game]['nature'].current_state agent.calc_reward(state=state) agent.calc_reward(state)for agent inself.schedule.agents: reward =self.games[agent.game]['receivers'][0].reward agent.set_reward(reward)for i, game inenumerate(self.games):print(f'Game {i}, expected_rewards={self.expected_rewards(game)}')def expected_rewards(self, game):return0.25# Running the modelstate_count =2# Number of states, signals, and actionssteps =10model = SignalingGame(senders_count=1, receivers_count=1, state_count=state_count, game_count=3,verbose=True)for i inrange(steps):print(f"--- Step {i+1} ---") model.step()
--- Step 1 ---
Sender 1 sends signal: A
Sender 4 sends signal: A
Sender 7 sends signal: B
Game 0, expected_rewards=0.25
Game 1, expected_rewards=0.25
Game 2, expected_rewards=0.25
--- Step 2 ---
Sender 1 sends signal: A
Sender 4 sends signal: A
Sender 7 sends signal: B
Game 0, expected_rewards=0.25
Game 1, expected_rewards=0.25
Game 2, expected_rewards=0.25
--- Step 3 ---
Sender 1 sends signal: A
Sender 4 sends signal: B
Sender 7 sends signal: A
Game 0, expected_rewards=0.25
Game 1, expected_rewards=0.25
Game 2, expected_rewards=0.25
--- Step 4 ---
Sender 1 sends signal: A
Sender 4 sends signal: A
Sender 7 sends signal: A
Game 0, expected_rewards=0.25
Game 1, expected_rewards=0.25
Game 2, expected_rewards=0.25
--- Step 5 ---
Sender 1 sends signal: B
Sender 4 sends signal: A
Sender 7 sends signal: A
Game 0, expected_rewards=0.25
Game 1, expected_rewards=0.25
Game 2, expected_rewards=0.25
--- Step 6 ---
Sender 1 sends signal: A
Sender 4 sends signal: B
Sender 7 sends signal: B
Game 0, expected_rewards=0.25
Game 1, expected_rewards=0.25
Game 2, expected_rewards=0.25
--- Step 7 ---
Sender 1 sends signal: A
Sender 4 sends signal: B
Sender 7 sends signal: A
Game 0, expected_rewards=0.25
Game 1, expected_rewards=0.25
Game 2, expected_rewards=0.25
--- Step 8 ---
Sender 1 sends signal: B
Sender 4 sends signal: A
Sender 7 sends signal: A
Game 0, expected_rewards=0.25
Game 1, expected_rewards=0.25
Game 2, expected_rewards=0.25
--- Step 9 ---
Sender 1 sends signal: B
Sender 4 sends signal: B
Sender 7 sends signal: A
Game 0, expected_rewards=0.25
Game 1, expected_rewards=0.25
Game 2, expected_rewards=0.25
--- Step 10 ---
Sender 1 sends signal: A
Sender 4 sends signal: A
Sender 7 sends signal: B
Game 0, expected_rewards=0.25
Game 1, expected_rewards=0.25
Game 2, expected_rewards=0.25
some refactoring ideas:
in the urn class add support a matrix based representation of the weights
lets fix bugs - remove unused items from the agent - replace the expected_rewards for a game with a code that calculates the expected rewards as follows:
Citation
BibTeX citation:
@online{bochman2024,
author = {Bochman, Oren},
title = {Signals {Experiment}},
date = {2024-05-07},
url = {https://orenbochman.github.io/posts/2024/2024-05-07-signaling-experiment/},
langid = {en}
}