import random
import datetime
= ['entry', 'product_view', 'add_to_cart', 'check_in', 'sale']
STATES
# Define the transition probabilities between states
= {
TRANSITION_PROBABILITIES 'entry': {'product_view': 0.5, 'check_in': 0.3, 'add_to_cart': 0.2},
'product_view': {'add_to_cart': 0.3, 'check_in': 0.5, 'entry': 0.2},
'add_to_cart': {'check_in': 0.4, 'product_view': 0.3, 'sale': 0.3},
'check_in': {'product_view': 0.4, 'add_to_cart': 0.3, 'sale': 0.3},
'sale': {}
}
def generate_events(user_count, start_date):
for user_id in range(1, user_count + 1):
# Generate a session ID for the user
= str(user_id) + '_' + str(random.randint(100000, 999999))
session_id # Initialize the state and timestamp for the user's first event
= 'entry'
current_state = start_date
current_time while True:
# Generate the next state based on the current state and transition probabilities
= random.choices(list(TRANSITION_PROBABILITIES[current_state].keys()),
next_state list(TRANSITION_PROBABILITIES[current_state].values()))[0]
# Generate a timestamp for the next event
= datetime.timedelta(seconds=random.randint(1, 300))
time_diff = current_time + time_diff
next_time # Generate a product ID for product-related events
= None
product if next_state in ['product_view', 'add_to_cart', 'sale']:
= 'product_' + str(random.randint(1, 100))
product # If the next state is a sale, generate a purchase price
= None
purchase_price if next_state == 'sale':
= round(random.uniform(10, 100), 2)
purchase_price # Yield the event with the user ID, session ID, state, timestamp, product, and purchase price
yield {'user_id': user_id, 'session_id': session_id, 'state': next_state, 'time_stamp': next_time,
'product': product, 'purchase_price': purchase_price}
# If the next state is a sale, stop generating events for the user
if next_state == 'sale':
break
# Otherwise, update the current state and timestamp for the next event
= next_state
current_state = next_time current_time
Goal:
Create Simulated data sets
steps:
- break down simulation into blocks
- simulate each block into a csv
- simulate an event stream
- simulate a graph
- put on s3
- put in deltalake
- process with spark
Block list
Simulated Fake data is one of the most important tools in the Bayesian arsnal.
- it makes us think about the data side of the problem.
- it lets find out about some of the contraints that should go into building the priors.
- it gives us a chorent ground truth to validate our models.
In this case I want to simulate an ecommerce store: the first two objects have to do with demand
- there are different types of seasonality [winter,summer,spring,fall,weekdays,weekend,holiday,none]
- there are different types of trend [up,down,steady,random]
- there are thee relations between products [substitutes, complements, none]
- there is three relation beteen products and brands [inferior, superior, none]
- there is a list of brands
- there is a list of product categories.
- there is a list of products.
generate_brand() id, rv_quality: Normal(quality_mean
, quality_variance
) name product_generator(brand,category): generate a product with fields: id, brand, description, quality_score drawn from the brand’s distribution for each product category: while there are less than three products in the category() for each brand: if there is a product with the brand continue else if random bernulli(p=0.5) > 0.3333 generate a product .
prompt
can you write a python generator that takes parmeters `user_count` for the number of user `start_date` and simulates user events in an ecommerce site. The events are drawn from a markov cain with the following states [entry, product_view, add_to_cart, check_in, sale] each event has a time_stamp, user_id, session_id, and product. The event is one of if the page is a sale there is a purchase price .
# Generate events for 10 users starting from January 1, 2023
= generate_events(user_count=10, start_date=datetime.datetime(2023, 1, 1))
events
# Iterate through the events and print them
for event in events:
print(event)
{'user_id': 1, 'session_id': '1_958738', 'state': 'check_in', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 0, 3), 'product': None, 'purchase_price': None}
{'user_id': 1, 'session_id': '1_958738', 'state': 'add_to_cart', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 1, 28), 'product': 'product_2', 'purchase_price': None}
{'user_id': 1, 'session_id': '1_958738', 'state': 'sale', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 2, 9), 'product': 'product_13', 'purchase_price': 23.23}
{'user_id': 2, 'session_id': '2_596879', 'state': 'add_to_cart', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 1, 4), 'product': 'product_73', 'purchase_price': None}
{'user_id': 2, 'session_id': '2_596879', 'state': 'product_view', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 2, 5), 'product': 'product_23', 'purchase_price': None}
{'user_id': 2, 'session_id': '2_596879', 'state': 'entry', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 6, 53), 'product': None, 'purchase_price': None}
{'user_id': 2, 'session_id': '2_596879', 'state': 'product_view', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 7, 31), 'product': 'product_79', 'purchase_price': None}
{'user_id': 2, 'session_id': '2_596879', 'state': 'check_in', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 9, 24), 'product': None, 'purchase_price': None}
{'user_id': 2, 'session_id': '2_596879', 'state': 'product_view', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 11, 38), 'product': 'product_50', 'purchase_price': None}
{'user_id': 2, 'session_id': '2_596879', 'state': 'check_in', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 15, 55), 'product': None, 'purchase_price': None}
{'user_id': 2, 'session_id': '2_596879', 'state': 'product_view', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 19, 56), 'product': 'product_34', 'purchase_price': None}
{'user_id': 2, 'session_id': '2_596879', 'state': 'check_in', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 24, 55), 'product': None, 'purchase_price': None}
{'user_id': 2, 'session_id': '2_596879', 'state': 'product_view', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 29, 43), 'product': 'product_58', 'purchase_price': None}
{'user_id': 2, 'session_id': '2_596879', 'state': 'add_to_cart', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 31, 53), 'product': 'product_84', 'purchase_price': None}
{'user_id': 2, 'session_id': '2_596879', 'state': 'product_view', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 33, 22), 'product': 'product_20', 'purchase_price': None}
{'user_id': 2, 'session_id': '2_596879', 'state': 'add_to_cart', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 37, 6), 'product': 'product_50', 'purchase_price': None}
{'user_id': 2, 'session_id': '2_596879', 'state': 'sale', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 39, 25), 'product': 'product_72', 'purchase_price': 54.06}
{'user_id': 3, 'session_id': '3_347676', 'state': 'add_to_cart', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 1, 41), 'product': 'product_58', 'purchase_price': None}
{'user_id': 3, 'session_id': '3_347676', 'state': 'check_in', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 3, 33), 'product': None, 'purchase_price': None}
{'user_id': 3, 'session_id': '3_347676', 'state': 'add_to_cart', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 7, 58), 'product': 'product_74', 'purchase_price': None}
{'user_id': 3, 'session_id': '3_347676', 'state': 'product_view', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 12, 53), 'product': 'product_72', 'purchase_price': None}
{'user_id': 3, 'session_id': '3_347676', 'state': 'check_in', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 14, 43), 'product': None, 'purchase_price': None}
{'user_id': 3, 'session_id': '3_347676', 'state': 'add_to_cart', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 15, 40), 'product': 'product_71', 'purchase_price': None}
{'user_id': 3, 'session_id': '3_347676', 'state': 'sale', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 18, 15), 'product': 'product_41', 'purchase_price': 94.77}
{'user_id': 4, 'session_id': '4_587160', 'state': 'add_to_cart', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 0, 39), 'product': 'product_10', 'purchase_price': None}
{'user_id': 4, 'session_id': '4_587160', 'state': 'check_in', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 0, 59), 'product': None, 'purchase_price': None}
{'user_id': 4, 'session_id': '4_587160', 'state': 'sale', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 2, 11), 'product': 'product_54', 'purchase_price': 67.46}
{'user_id': 5, 'session_id': '5_477608', 'state': 'add_to_cart', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 2, 18), 'product': 'product_58', 'purchase_price': None}
{'user_id': 5, 'session_id': '5_477608', 'state': 'check_in', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 2, 25), 'product': None, 'purchase_price': None}
{'user_id': 5, 'session_id': '5_477608', 'state': 'add_to_cart', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 5, 25), 'product': 'product_37', 'purchase_price': None}
{'user_id': 5, 'session_id': '5_477608', 'state': 'sale', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 6, 55), 'product': 'product_40', 'purchase_price': 36.41}
{'user_id': 6, 'session_id': '6_643002', 'state': 'product_view', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 4, 9), 'product': 'product_18', 'purchase_price': None}
{'user_id': 6, 'session_id': '6_643002', 'state': 'add_to_cart', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 6, 14), 'product': 'product_42', 'purchase_price': None}
{'user_id': 6, 'session_id': '6_643002', 'state': 'product_view', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 10, 11), 'product': 'product_55', 'purchase_price': None}
{'user_id': 6, 'session_id': '6_643002', 'state': 'check_in', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 12, 29), 'product': None, 'purchase_price': None}
{'user_id': 6, 'session_id': '6_643002', 'state': 'add_to_cart', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 14, 43), 'product': 'product_54', 'purchase_price': None}
{'user_id': 6, 'session_id': '6_643002', 'state': 'check_in', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 17, 45), 'product': None, 'purchase_price': None}
{'user_id': 6, 'session_id': '6_643002', 'state': 'sale', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 20, 39), 'product': 'product_96', 'purchase_price': 41.68}
{'user_id': 7, 'session_id': '7_110019', 'state': 'product_view', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 1, 12), 'product': 'product_77', 'purchase_price': None}
{'user_id': 7, 'session_id': '7_110019', 'state': 'check_in', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 4, 33), 'product': None, 'purchase_price': None}
{'user_id': 7, 'session_id': '7_110019', 'state': 'add_to_cart', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 7, 41), 'product': 'product_54', 'purchase_price': None}
{'user_id': 7, 'session_id': '7_110019', 'state': 'check_in', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 11, 45), 'product': None, 'purchase_price': None}
{'user_id': 7, 'session_id': '7_110019', 'state': 'product_view', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 13, 26), 'product': 'product_33', 'purchase_price': None}
{'user_id': 7, 'session_id': '7_110019', 'state': 'add_to_cart', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 16, 39), 'product': 'product_75', 'purchase_price': None}
{'user_id': 7, 'session_id': '7_110019', 'state': 'sale', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 17, 45), 'product': 'product_92', 'purchase_price': 45.38}
{'user_id': 8, 'session_id': '8_649806', 'state': 'product_view', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 3, 54), 'product': 'product_83', 'purchase_price': None}
{'user_id': 8, 'session_id': '8_649806', 'state': 'check_in', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 7, 40), 'product': None, 'purchase_price': None}
{'user_id': 8, 'session_id': '8_649806', 'state': 'sale', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 11, 17), 'product': 'product_100', 'purchase_price': 27.01}
{'user_id': 9, 'session_id': '9_520265', 'state': 'check_in', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 1, 39), 'product': None, 'purchase_price': None}
{'user_id': 9, 'session_id': '9_520265', 'state': 'product_view', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 4, 41), 'product': 'product_80', 'purchase_price': None}
{'user_id': 9, 'session_id': '9_520265', 'state': 'entry', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 5, 21), 'product': None, 'purchase_price': None}
{'user_id': 9, 'session_id': '9_520265', 'state': 'add_to_cart', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 7, 39), 'product': 'product_8', 'purchase_price': None}
{'user_id': 9, 'session_id': '9_520265', 'state': 'check_in', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 10, 35), 'product': None, 'purchase_price': None}
{'user_id': 9, 'session_id': '9_520265', 'state': 'add_to_cart', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 11, 22), 'product': 'product_24', 'purchase_price': None}
{'user_id': 9, 'session_id': '9_520265', 'state': 'check_in', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 15, 18), 'product': None, 'purchase_price': None}
{'user_id': 9, 'session_id': '9_520265', 'state': 'sale', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 15, 37), 'product': 'product_1', 'purchase_price': 43.21}
{'user_id': 10, 'session_id': '10_857829', 'state': 'check_in', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 4, 5), 'product': None, 'purchase_price': None}
{'user_id': 10, 'session_id': '10_857829', 'state': 'product_view', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 6, 14), 'product': 'product_2', 'purchase_price': None}
{'user_id': 10, 'session_id': '10_857829', 'state': 'check_in', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 8, 32), 'product': None, 'purchase_price': None}
{'user_id': 10, 'session_id': '10_857829', 'state': 'product_view', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 11, 15), 'product': 'product_99', 'purchase_price': None}
{'user_id': 10, 'session_id': '10_857829', 'state': 'check_in', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 16, 4), 'product': None, 'purchase_price': None}
{'user_id': 10, 'session_id': '10_857829', 'state': 'sale', 'time_stamp': datetime.datetime(2023, 1, 1, 0, 16, 21), 'product': 'product_37', 'purchase_price': 87.24}
can you use the python faker to create a user genrator and a product generator then incorporate these into the about event_genrator adding support for units_sold
and the page_address based on the product. The products should have a category and thier price should be drawn from a category level normal distribution
Citation
@online{bochman2023,
author = {Bochman, Oren},
title = {Event Generator},
date = {2023-02-16},
url = {https://orenbochman.github.io/posts/2024/2023-03-16-events-generator/},
langid = {en}
}