import pandas as pd
import string
raw_tweets=[
"I am happy because I am learning NLP",
"I am sad, I am not learning NLP",
"I am happy, not sad",
"I am sad, not happy",
]
def clean(tweet:str):
return tweet.translate(str.maketrans('', '', string.punctuation)).lower()
tweets = [clean(tweet) for tweet in raw_tweets]
labels=['+','-','+','-']
df = pd.DataFrame({'tweets': tweets, 'labels': labels})
df| tweets | labels | |
|---|---|---|
| 0 | i am happy because i am learning nlp | + |
| 1 | i am sad i am not learning nlp | - |
| 2 | i am happy not sad | + |
| 3 | i am sad not happy | - |
