from mesa import Agent, Modelfrom mesa.time import RandomActivationclass Urn:def__init__(self, options, balls=None):self.options = optionsif balls isnotNone:self.balls = ballselse:self.balls = {option: 1.0for option inself.options}def get_filtered_urn(self, filter):'''Filters urn's options by prefix and normalizes the weights.''' filtered_options = [k for k inself.balls.keys() if k.startswith(filter)]assertlen(filtered_options) >0, f"no options found for filter={filter} on {self.balls}" filtered_balls = {opt: self.balls[opt] for opt in filtered_options} total_balls =sum(filtered_balls.values())assert total_balls >0.0, f"total weights is {total_balls} after filter={filter} on {self.balls}" filtered_probs = {opt: self.balls[opt]/total_balls for opt in filtered_options}return filtered_probsdef choose_option(self, filter, model):'''Filters the urn based on a option prefix (state for sender, signal for reciever). In the litrature agents have multiple urns to support learning conditional probabilites for differnt context. - sender need one urns per state, and - recievers need one urn per signal. I choose a simpler representation by implemented multiple urns as a single matrix To get the wieghts coresponding to a for a given prefix we filter the urn based on the prefix. allow updating the conditional probabilities for each signal given a state. We have one urn and estimate the conditional probabilities by filtering the urn based on the prefix. '''ifself.verbose:print(f'choose_option({filter=})') urn =self.get_filtered_urn(filter)return model.random.choices(list(urn.keys()), list(urn.values()))def update_weights(self, option, reward): old_balls =self.balls[option]self.balls[option] += reward ifself.verbose:print(f"Updated weight for option {option}: {old_balls} -> {self.balls[option]}")class HerrnsteinRL(Urn):''' Herrnstein matching law with learning rate.'''def__init__(self, options, learning_rate=1.0, verbose=False, name='Herrnstein matching law'):#add docstring''' Herrnstein matching law with learning rate. Parameters: options: list of options learning_rate: float, default 1.0 should behave like the parent urn model verbose: bool, default False name: str, the rule name 'Herrnstein matching law' '''super().__init__(options)self.verbose = verboseself.name = nameself.learning_rate = learning_rateself.options = optionsifself.verbose:print(f'LearningRule.__init__(Options: {options})')def update_weights(self, option, reward):''' this adds the learning rate to the update''' old_balls =self.balls[option]self.balls[option] +=self.learning_rate * reward ifself.verbose:print(f"Updated weight for option {option}: {old_balls} -> {self.balls[option]}")class LewisAgent(Agent):def__init__(self, unique_id, model, game, role, verbose=False):'''Agent for Lewis signaling game. Parameters: unique_id: int, unique identifier model: SignalingGame, the model game: int, the game number role: str, the role of the agent verbose: bool, default False '''super().__init__(unique_id, model)self.role = roleself.verbose = verboseself.message =Noneself.action =Noneself.game = gameself.current_state =Noneif role =="sender":self.urn = HerrnsteinRL(model.states_signals, learning_rate=1.0, verbose=verbose, name='state_signal_weights')elif role =="receiver":self.urn = HerrnsteinRL(model.signals_actions, learning_rate=1.0, verbose=verbose, name='signal_action_weights')else:# consider adding an urn for nature to use for choosing states# this way one could use simple modifcation of the urn class to to support some basic distribution via their urn model.# and we could also visualize the urns and their weights using a simple schematicself.urn = HerrnsteinRL(model.states, learning_rate=0.0, verbose=verbose, name='state_weights')self.messages = []self.actions = []self.reward =0def step(self):self.messages = []self.actions = []def gen_state(self):ifself.role =="nature":#self.current_state = self.model.random.choice(self.model.states)#use the urn to choose the stateself.current_state =self.urn.choose_option(filter='', model=self.model)[0]ifself.verbose:print(f"Nature {self.unique_id} set state {self.current_state}")@propertydef state(self):ifself.role =="nature":returnself.current_statedef choose_signal(self, state):ifself.role =="sender":self.option =self.urn.choose_option(filter=state, model=self.model)self.signal =self.option[0].split('_')[1]ifTrue:#self.verbose:print(f"Sender {self.unique_id} sends signal: {self.signal}")returnself.signaldef send_signal(self, state, receiver):ifself.role =="sender":asserttype(state) ==str, f"state must be a string"assertlen(state) >0, f"state must be a non-empty string"assert receiver isnotNone, f"receiver must be a valid agent"assert state inself.model.states, f"{state=} must be in {self.model.states}" signal =self.choose_signal(state) receiver.messages.append(signal)ifself.verbose:print(f"Sender {self.unique_id} sends signal: {signal}")def fuse_actions(self, actions):self.action =0ifself.role =="receiver":iflen(actions) ==1:self.action = actions[0]else:for i inrange(len(actions)):self.action +=int(actions[i]) * (2** i)returnself.actiondef decode_message(self, signal):ifself.role =="receiver": message =self.urn.choose_option(filter=signal, model=self.model)ifself.verbose:print(f"Receiver {self.unique_id} received signal: {self.message}")return messagedef set_action(self):'''Receiver decodes each message then then fuses them into one action'''ifself.role =="receiver":for signal inself.messages:asserttype(signal) ==str, f"{signal=} must be a string"self.actions.append(self.decode_message(signal)) action =self.fuse_actions(self.actions)ifself.verbose:print(f"Receiver {self.unique_id} decided on action: {action}")def set_reward(self,reward):ifself.role !="nature":self.reward = rewardifself.verbose:print(f"agent {self.unique_id} received reward: {self.reward}")def calc_reward(self,state):''' only reveiver calculates reward'''ifself.role =="receiver": action =self.action reward =1.0if action == state else0.0self.model.reward = rewardclass SignalingGame(Model):def__init__(self, game_count=2, senders_count=1, receivers_count=1, state_count=3,verbose=False):super().__init__()self.verbose = verboseself.schedule = RandomActivation(self)# states, signals, and actionsself.states = [f'{i}'for i inrange(state_count)]self.signals = [chr(65+ i) for i inrange(state_count)]self.actions = [f'{i}'for i inrange(state_count)]# urn options for sender and receiverself.states_signals = [f'{state}_{signal}'for state inself.states for signal inself.signals]self.signals_actions = [f'{signal}_{action}'for signal inself.signals for action inself.actions]self.current_state =Noneself.games = []self.uid =0self.senders_count = senders_countself.receivers_count = receivers_countfor i inrange(game_count): game = {'senders': [], 'receivers': [], 'nature': None} nature = LewisAgent(self.uid, self, game=i, role="nature") game['nature'] = natureself.schedule.add(nature)self.uid +=1for j inrange(senders_count): sender = LewisAgent(self.uid, self, game=i, role="sender") game['senders'].append(sender)self.schedule.add(sender)self.uid +=1for j inrange(receivers_count): receiver = LewisAgent(self.uid, self, game=i, role="receiver") game['receivers'].append(receiver)self.schedule.add(receiver)self.uid +=1self.games.append(game)def step(self):for agent inself.schedule.agents: agent.step()if agent.role =='nature': agent.gen_state()for agent inself.schedule.agents:if agent.role =='sender': state =self.games[agent.game]['nature'].current_statefor receiver inself.games[agent.game]['receivers']: agent.send_signal(state, receiver)for agent inself.schedule.agents:if agent.role =='receiver': agent.set_action() state =self.games[agent.game]['nature'].current_state agent.calc_reward(state=state) agent.calc_reward(state)for agent inself.schedule.agents: reward =self.games[agent.game]['receivers'][0].reward agent.set_reward(reward)for i, game inenumerate(self.games):print(f'Game {i}, expected_rewards={self.expected_rewards(game)}')def expected_rewards(self, game):return0.25# Running the modelstate_count =2# Number of states, signals, and actionssteps =10model = SignalingGame(senders_count=1, receivers_count=1, state_count=state_count, game_count=3,verbose=True)for i inrange(steps):print(f"--- Step {i+1} ---") model.step()
--- Step 1 ---
Sender 1 sends signal: B
Sender 4 sends signal: A
Sender 7 sends signal: A
Game 0, expected_rewards=0.25
Game 1, expected_rewards=0.25
Game 2, expected_rewards=0.25
--- Step 2 ---
Sender 1 sends signal: A
Sender 4 sends signal: B
Sender 7 sends signal: B
Game 0, expected_rewards=0.25
Game 1, expected_rewards=0.25
Game 2, expected_rewards=0.25
--- Step 3 ---
Sender 1 sends signal: A
Sender 4 sends signal: B
Sender 7 sends signal: B
Game 0, expected_rewards=0.25
Game 1, expected_rewards=0.25
Game 2, expected_rewards=0.25
--- Step 4 ---
Sender 1 sends signal: B
Sender 4 sends signal: B
Sender 7 sends signal: B
Game 0, expected_rewards=0.25
Game 1, expected_rewards=0.25
Game 2, expected_rewards=0.25
--- Step 5 ---
Sender 1 sends signal: B
Sender 4 sends signal: B
Sender 7 sends signal: A
Game 0, expected_rewards=0.25
Game 1, expected_rewards=0.25
Game 2, expected_rewards=0.25
--- Step 6 ---
Sender 1 sends signal: B
Sender 4 sends signal: A
Sender 7 sends signal: A
Game 0, expected_rewards=0.25
Game 1, expected_rewards=0.25
Game 2, expected_rewards=0.25
--- Step 7 ---
Sender 1 sends signal: A
Sender 4 sends signal: A
Sender 7 sends signal: A
Game 0, expected_rewards=0.25
Game 1, expected_rewards=0.25
Game 2, expected_rewards=0.25
--- Step 8 ---
Sender 1 sends signal: A
Sender 4 sends signal: A
Sender 7 sends signal: B
Game 0, expected_rewards=0.25
Game 1, expected_rewards=0.25
Game 2, expected_rewards=0.25
--- Step 9 ---
Sender 1 sends signal: B
Sender 4 sends signal: A
Sender 7 sends signal: B
Game 0, expected_rewards=0.25
Game 1, expected_rewards=0.25
Game 2, expected_rewards=0.25
--- Step 10 ---
Sender 1 sends signal: A
Sender 4 sends signal: A
Sender 7 sends signal: A
Game 0, expected_rewards=0.25
Game 1, expected_rewards=0.25
Game 2, expected_rewards=0.25
/home/oren/.local/lib/python3.10/site-packages/mesa/time.py:82: FutureWarning:
The AgentSet is experimental. It may be changed or removed in any and all future releases, including patch releases.
We would love to hear what you think about this new feature. If you have any thoughts, share them with us here: https://github.com/projectmesa/mesa/discussions/1919
some refactoring ideas:
in the urn class add support a matrix based representation of the weights
lets fix bugs - remove unused items from the agent - replace the expected_rewards for a game with a code that calculates the expected rewards as follows:
Citation
BibTeX citation:
@online{bochman2024,
author = {Bochman, Oren},
title = {Signals {Experiment}},
date = {2024-05-01},
url = {https://orenbochman.github.io/posts/2024/2024-05-01-signals/experiment.html},
langid = {en}
}