Stopword 제거와 Lemmatized 까지 된 Dataset
BERT 벡터 임베딩 추가된 Dataset
아래는 임베딩 벡터만 있는 csv
Doc2Vec데이터셋
import requests
from os import path
import pandas as pd
# Source
data_path = './dataset_210626_215600.csv'
if not path.exists(data_path):
url = '<https://kyuuuw-nlp-dataset.s3.ap-northeast-2.amazonaws.com/fragrantica/dataset_210626_215600.csv>'
r = requests.get(url, allow_redirects=True)
open(data_path, 'wb').write(r.content)
data = pd.read_csv(data_path)