Example 2: Sentiment Analysis with RNN: Classifying Movie Reviews Using PyTorch
In this example, we will use a public dataset to perform sentiment analysis on movie reviews. The goal is to classify each review as positive or negative using an RNN. Step-by-Step Implementation:
Step 1: Import Libraries
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader
Step 2: Load and Preprocess the Dataset
We will load the IMDB movie reviews dataset directly from a URL and preprocess it.
# Load dataset
url = "https://raw.githubusercontent.com/justmarkham/DAT8/master/data/sms.tsv"
df = pd.read_csv(url, delimiter='\t', header=None, names=['label', 'text'])
# Preprocess dataset
def preprocess_text(text):
return text.lower().split()
df['text'] = df['text'].apply(preprocess_text)
df = df[['text', 'label']]
# Encode labels
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])
# Split dataset
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)
# Vocabulary and indexing
vocab = set([word for phrase in df['text'] for word in phrase])
word_to_idx = {word: idx for idx, word in enumerate(vocab, 1)}
def encode_phrase(phrase):
return [word_to_idx[word] for word in phrase]
train_data['text'] = train_data['text'].apply(encode_phrase)
test_data['text'] = test_data['text'].apply(encode_phrase)
# Padding sequences
max_length = max(df['text'].apply(len))
def pad_sequence(seq, max_length):
return seq + [0] * (max_length - len(seq))
train_data['text'] = train_data['text'].apply(lambda x: pad_sequence(x, max_length))
test_data['text'] = test_data['text'].apply(lambda x: pad_sequence(x, max_length))
Step 3: Create Dataset and DataLoader
class SentimentDataset(Dataset):
def __init__(self, data):
self.texts = data['text'].values
self.labels = data['label'].values
def __len__(self):
return len(self.texts)
def __getitem__(self, idx):
text = self.texts[idx]
label = self.labels[idx]
return torch.tensor(text, dtype=torch.long), torch.tensor(label, dtype=torch.long)
train_dataset = SentimentDataset(train_data)
test_dataset = SentimentDataset(test_data)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
Step 4: Define the RNN Model
The SentimentRNN is a type of neural network designed to understand sequences of words and determine if a piece of text (like a sentence) is positive or negative. Think of it like a brain that can read and understand emotions in text.
class SentimentRNN(nn.Module):
def __init__(self, vocab_size, embed_size, hidden_size, output_size):
super(SentimentRNN, self).__init__()
self.embedding = nn.Embedding(vocab_size, embed_size)
self.rnn = nn.RNN(embed_size, hidden_size, batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = self.embedding(x)
h0 = torch.zeros(1, x.size(0), hidden_size).to(x.device)
out, _ = self.rnn(x, h0)
out = self.fc(out[:, -1, :])
return out
vocab_size = len(vocab) + 1
embed_size = 128
hidden_size = 128
output_size = 2 # For binary classification
model = SentimentRNN(vocab_size, embed_size, hidden_size, output_size)
Step 5: Train the Model
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
model.train()
epoch_loss = 0
for texts, labels in train_loader:
outputs = model(texts)
loss = criterion(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss += loss.item()
print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss / len(train_loader):.4f}')
Output:
Epoch [1/10], Loss: 0.4016
Epoch [2/10], Loss: 0.3999
Epoch [3/10], Loss: 0.4004
Epoch [4/10], Loss: 0.3954
Epoch [5/10], Loss: 0.3969
Epoch [6/10], Loss: 0.3978
Epoch [7/10], Loss: 0.3960
Epoch [8/10], Loss: 0.3959
Epoch [9/10], Loss: 0.3967
Epoch [10/10], Loss: 0.3953
Step 6: Evaluate the Model
model.eval()
correct = 0
total = 0
with torch.no_grad():
for texts, labels in test_loader:
outputs = model(texts)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
print(f'Accuracy: {accuracy:.2f}%')
Output:
Accuracy: 86.64%
Step 7: Visualize Training Loss
losses = []
for epoch in range(num_epochs):
model.train()
epoch_loss = 0
for texts, labels in train_loader:
outputs = model(texts)
loss = criterion(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss += loss.item()
losses.append(epoch_loss / len(train_loader))
print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss / len(train_loader):.4f}')
# Plot training loss
plt.figure(figsize=(10, 6))
plt.plot(range(1, num_epochs + 1), losses, marker='o')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss')
plt.show()
Output :
Epoch [1/10], Loss: 0.3946
Epoch [2/10], Loss: 0.3990
Epoch [3/10], Loss: 0.3968
Epoch [4/10], Loss: 0.3988
Epoch [5/10], Loss: 0.3949
Epoch [6/10], Loss: 0.3983
Epoch [7/10], Loss: 0.3997
Epoch [8/10], Loss: 0.3991
Epoch [9/10], Loss: 0.3991
Epoch [10/10], Loss: 0.3956
We have used two examples to show how to use PyTorch to create Recurrent Neural Networks.
- In the first example, sentiment analysis was done on movie reviews using the IMDB dataset, and the next value in a sequence was predicted using a synthetic sine wave dataset.
- You can get a strong understanding of RNNs and their applications to a range of tasks by, following these steps.
Implementing Recurrent Neural Networks in PyTorch
Recurrent Neural Networks (RNNs) are a class of neural networks that are particularly effective for sequential data. Unlike traditional feedforward neural networks, RNNs have connections that form directed cycles, allowing them to maintain a hidden state that can capture information from previous inputs. This makes them suitable for tasks such as time series prediction, natural language processing, and more.In this article, we will explore how to implement RNNs in PyTorch.
Table of Content
- Introduction to Recurrent Neural Networks
- Building an RNN from Scratch in Pytorch
- Setting Up the Environment
- Steps to Build an RNN
- Example 1: Predicting Sequential Data: An RNN Approach Using PyTorch
- Example 2: Sentiment Analysis with RNN: Classifying Movie Reviews Using PyTorch
Contact Us