import pandas as pd
import string
=[
raw_tweets"I am happy because I am learning NLP",
"I am sad, I am not learning NLP",
"I am happy, not sad",
"I am sad, not happy",
]def clean(tweet:str):
return tweet.translate(str.maketrans('', '', string.punctuation)).lower()
= [clean(tweet) for tweet in raw_tweets]
tweets =['+','-','+','-']
labels= pd.DataFrame({'tweets': tweets, 'labels': labels})
df df
tweets | labels | |
---|---|---|
0 | i am happy because i am learning nlp | + |
1 | i am sad i am not learning nlp | - |
2 | i am happy not sad | + |
3 | i am sad not happy | - |