import pandas as pd
import string 
raw_tweets=[
  "I am happy because I am learning NLP",
  "I am sad, I am not learning NLP",
  "I am happy, not sad",
  "I am sad, not happy",
]
def clean(tweet:str):
  return  tweet.translate(str.maketrans('', '', string.punctuation)).lower()
tweets = [clean(tweet) for tweet in raw_tweets]
labels=['+','-','+','-']
df = pd.DataFrame({'tweets': tweets, 'labels': labels})
df| tweets | labels | |
|---|---|---|
| 0 | i am happy because i am learning nlp | + | 
| 1 | i am sad i am not learning nlp | - | 
| 2 | i am happy not sad | + | 
| 3 | i am sad not happy | - | 
