DEV Community

Mass
Mass

Posted on

AI

1. 
Q. Implement a Python-based text preprocessing pipeline for the given natural language data that
performs the following tasks:
    1. Tokenize the input text into words or sentences.
    2. Eliminate stopwords to retain meaningful words in the text.
    3. Perform Part-of-speech (POS) tagging.
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk import pos_tag

#download nltk data

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('veraged_perceptron_tagger')
file_path='/Users/surajsatheesh/MCA/Third Semester/Advanced AI/end sem practicals/dataset/Text dataset/textfile(1).txt'
file = open(file_path, mode='r')
text = file.read()
for i in text:
    print(text)
#sentences tokenize

sentences = sent_tokenize(text)
print(f"Sentences tokenize:\n")
for sent in sentences:
    print(sent)
#words tokenize
word_tokens = [word_tokenize(sentence) for sentence in sentences]
print(f"Word Tokenize\n")
for word in word_tokens:
    print(word)
    print("---------------------\n\n")
#stopwords

stop_words = set(stopwords.words('english'))

filtered_words = []
for tokens in word_tokens:
    sentence_filtered = []
    for word in tokens:
        if word.lower() not in stop_words:
            sentence_filtered.append(word)
    filtered_words.append(sentence_filtered)

print("Filtered words (Stopwords removed: )\n")

for words in filtered_words:
    print(words)
    print("---------\n\n")
#part of speech 
pos_tags = [pos_tag(tokens) for tokens in filtered_words[:10]] # 10 stands for only 10 sentences
print(f"POS Tags: \n")
for toks in pos_tags:
    print(toks)
    print("---------\n\n")

2. 
Q. Implement a Python-based text preprocessing pipeline for the given natural language data that
performs the following tasks:

    1. Correct spelling errors in the text.
    2. Apply stemming and lemmatization to standardize words.
    3. Perform Named Entity Recognition (NER) to identify entities like names, dates, or
       locations in the text.

!python -m spacy download en_core_web_sm
import nltk
from textblob import TextBlob
from nltk.stem import SnowballStemmer, WordNetLemmatizer
import spacy

nltk.download('punkt')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

nlp = spacy.load("en_core_web_sm")
file_path = "/Users/surajsatheesh/MCA/Third Semester/Advanced AI/end sem practicals/dataset/Text dataset/textfile(2).txt"
file = open(file_path, mode='r')
text = file.read()
for i in text:
    print(text)
#spell correction
blob = TextBlob(text)
print(blob.correct())
#stemming 
stemmer = SnowballStemmer("english")
words = nltk.word_tokenize(text)
stemmed_words = [stemmer.stem(word) for word in words]
print(" ".join(stemmed_words))
#lemmatization
lemmatizer = WordNetLemmatizer()
words = nltk.word_tokenize(text)
lemmatizer_words = [lemmatizer.lemmatize(word) for word in words]
print(' '.join(lemmatizer_words))
#name entity recongnition NER

doc = nlp(text)
entities = [(ent.text, ent.label_)for ent in doc.ents]
for ent in entities:
    print(ent)


3.
Q. Implement a text preprocessing pipeline for the given dataset and perform sentiment analysis
to classify the text as positive, negative, or neutral.
Display the sentiment score and its corresponding sentiment label (positive, negative, or
neutral) for each text sample.
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
from nltk.sentiment import SentimentIntensityAnalyzer

#download VADER

nltk.download('vader_lexicon')
file_path = "/Users/surajsatheesh/MCA/Third Semester/Advanced AI/end sem practicals/dataset/Sentiment dataset/Sports.csv"
df = pd.read_csv(file_path)
df.head()
#define preprocessing function
def preprocess_text(text):
    #lowercase
    text = text.lower()
    #tokenize and remove stopwords
    stop_words = set(stopwords.words("english"))
    words = text.split()
    words = [word for word in words if word not in stop_words]
    #stemming
    stremmer = SnowballStemmer('english')
    words = [stremmer.stem(word) for word in words]
    #join words back into a string
    return ' '.join(words)

df['cleaned_text'] = df['Text'].apply(preprocess_text)
print(df[["Text", "cleaned_text"]].head())

sia = SentimentIntensityAnalyzer()

#perform sentiment analysis

def get_sentiment(text):
    score = sia.polarity_scores(text)['compound']
    if score >0.05:
        return score, 'Postive'
    elif score <-0.05:
        return score, 'Negative'
    else:
        return score, "Netural"

#apply sentiment analysis

df['sentiment_score'], df['sentiment_label'] = zip(*df['cleaned_text'].apply(get_sentiment))


#print result
for index, row in df .iterrows():
    print(f"Text: {row['Text']}")
    print(f"Sentiment Score: {row['sentiment_score']}")
    print(f"Sentiment Label: {row['sentiment_label']}\n")
import seaborn as sns
import matplotlib.pyplot as plt

sns.countplot(data=df, x='sentiment_label', palette='pastel')
plt.title('Sentiment Distribution')
plt.xlabel('Sentiment')
plt.ylabel('Count')
plt.show()



4.
Q. Write a program to read an image, convert it to grayscale and perform the following tasks:
Display the Discrete Fourier Transform (DFT) magnitude spectrum.
Display the Discrete Cosine Transform (DCT) magnitude spectrum.

import cv2
import numpy as np
import matplotlib.pyplot as plt

img_path = '/Users/surajsatheesh/MCA/Third Semester/Advanced AI/end sem practicals/dataset/Image/Chihuahua-dog.webp'

#displaying original img
img = cv2.imread(img_path)
rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.title("Original Image")
plt.axis("off")
plt.imshow(rgb)
plt.show()
#converting to grayscale
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
plt.imshow(img, cmap='gray')
plt.title("Grayscale Image")
plt.axis("off")
plt.show()

#DFT magnitude spectrum

dft = np.fft.fft2(img)
dft_shifted = np.fft.fftshift(dft) #shift zero-frequency to the center
magnitude_spectrum = 20 *np.log(np.abs(dft_shifted) +1)
plt.imshow(magnitude_spectrum, cmap='gray')
plt.title("DFT magnitude spectrum")
plt.axis('off')
plt.show()
h, w = img.shape
new_h = h - (h%2)
new_w = w - (w%2)
img = img[:new_h, :new_w]
#DCT magnitude spectrum

img_float = np.float32(img) #convert to float32 for DCT
dct = cv2.dct(img_float)
magnitude_spectrum = 20 *np.log(np.abs(dct)+1)
plt.imshow(magnitude_spectrum, cmap='gray')
plt.title('DCT magnitude spectrum')
plt.axis('off')
plt.show()

5.
Q. Write a program to read an image, convert it to grayscale and perform the following tasks

Display the output of walsh hadamard transformation

Display the output of slant transformation
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from scipy.ndimage import affine_transform
from scipy.linalg import hadamard
image_path = "/content/image8.jpg"
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
# Load and process the image
image = Image.open(image_path)

# Convert to grayscale
grayscale_image = image.convert('L')

# Convert to a NumPy array
image_array = np.array(grayscale_image)

# Define the transformation matrix for slant (affine transformation)
transformation_matrix = np.array([[1, 0.5, 0], [0, 1, 0]], dtype=float)
full_matrix = np.vstack([transformation_matrix, [0, 0, 1]])

# Apply the affine transformation
transformed_array = affine_transform(image_array, full_matrix[:2, :2], offset=full_matrix[:2, 2], output_shape=image_array.shape)

# Normalize and convert the transformed array to an 8-bit image
transformed_array_normalized = np.clip(transformed_array, 0, 255).astype(np.uint8)
transformed_image = Image.fromarray(transformed_array_normalized)

# --- Walsh-Hadamard Transformation ---
# Generate the Walsh matrix (nearest power of 2 for compatibility)
size = 2 ** int(np.floor(np.log2(min(image_array.shape))))  # Find the nearest power of 2
walsh_matrix = hadamard(size)

# Crop the image to match the Walsh matrix size
cropped_image_array = image_array[:size, :size]

# Apply Walsh-Hadamard transformation
walsh_transformed_array = np.dot(walsh_matrix, cropped_image_array)
walsh_transformed_array = np.dot(walsh_transformed_array, walsh_matrix)

# Normalize for display
walsh_transformed_array_normalized = np.clip(walsh_transformed_array, 0, 255).astype(np.uint8)
walsh_transformed_image = Image.fromarray(walsh_transformed_array_normalized)

# Apply a colormap (e.g., 'viridis')
colormap = plt.get_cmap('viridis')
colored_image_array = colormap(transformed_array_normalized / 255.0)  # Normalize to [0, 1]
colored_image_array = (colored_image_array[:, :, :3] * 255).astype(np.uint8)  # Discard alpha channel and scale to [0, 255]

# Convert the colored slant-transformed array back to an image
colored_slant_image = Image.fromarray(colored_image_array)

# Create a 2x2 grid for the plots
plt.figure(figsize=(12, 12))


# Original image (top left)
plt.subplot(2, 2, 1)
plt.imshow(image)
plt.title("Original Image")
plt.axis('off')

# Grayscale image (top right)
plt.subplot(2, 2, 2)
plt.imshow(grayscale_image, cmap='gray')
plt.title("Grayscale Image")
plt.axis('off')

# Slant transformed image (bottom left)
plt.subplot(2, 2, 3)
plt.imshow(transformed_image, cmap='gray')
plt.title("Slant Transformed Image")
plt.axis('off')



# Walsh-Hadamard transformed image
plt.subplot(2, 2, 4)
plt.imshow(walsh_transformed_image, cmap='gray')
plt.title("Walsh-Hadamard Transformed Image")
plt.axis('off')

# Show the images
plt.show()
# Colored slant image (bottom right)
plt.subplot(2, 2, 4)
plt.imshow(colored_slant_image)
plt.title("Colored Slant Image")
plt.axis('off')

6.
Q. Implement an audio data analysis pipeline to explore and extract fundamental audio features.Use the given audio dataset to perform the following tasks

Extract the following key audio features:

Amplitude Envelope

Loudness (in decibels)

Visualize these features using appropriate plots

import librosa
import numpy as np
import matplotlib.pyplot as plt
#load audio file

audio_path = '/Users/surajsatheesh/MCA/Third Semester/Advanced AI/end sem practicals/dataset/Audio/Audio(2).mp3'
y, sr = librosa.load(audio_path, sr=None)

rms = librosa.feature.rms(y=y) [0]

#loudness (in dB) using RMS to compute the loudness in dB
loudness = librosa.amplitude_to_db(rms)

#create time victor for x axis based on the number of frames and sampling rate
frame_times = librosa.frames_to_time(np.arange(len(rms)), sr=sr)

#visualization
plt.figure(figsize=(10, 6))

#plot amplitude envelope
plt.subplot(2, 1, 1)
plt.plot(frame_times, rms, label='Amplitude Envelope (RMS)', color='b')
plt.title("Amplitude Envelope")
plt.xlabel("Time (s)")
plt.ylabel('Amplitude')
plt.grid(True)
plt.legend()
plt.show()


#plot loudness in dB
plt.subplot(2, 1, 2)
plt.plot(frame_times, loudness, label='Loudness in dB', color='r')
plt.title("Loudness in dB")
plt.xlabel("Time (s)")
plt.ylabel("Loudness")
plt.legend()
plt.tight_layout()
plt.grid(True)
plt.show()

7.
Q. Implement an audio data analysis pipeline to explore and extract fundamental audio features.Use the given audio dataset to perform the following tasks

Extract the following key audio features:

Chroma Features

Mel-Frequency Cepstral Coefficients (MFCC)

Visualize these features using appropriate plots

import librosa
import librosa.display
import matplotlib.pyplot as plt
#load an audio file

audio_path = '/Users/surajsatheesh/MCA/Third Semester/Advanced AI/end sem practicals/dataset/Audio/audio6.mp3'
audio, sr = librosa.load(audio_path)
#extract chroma features

chroma = librosa.feature.chroma_stft(y=audio, sr=sr)

#plot the chroma feature

plt.figure(figsize=(10,6))
librosa.display.specshow(chroma, y_axis='chroma', x_axis='time', cmap='coolwarm')
plt.title('chroma feature')
plt.colorbar()
plt.show()
#extract MFCC features 
mfcc = librosa.feature.mfcc(y=audio, sr=sr)

#plot the mfcc feature
plt.figure(figsize=(10,6))
librosa.display.specshow(mfcc,x_axis='time', sr=sr, cmap='coolwarm')
plt.title('mfcc')
plt.colorbar()
plt.show()

8.
Q. Implement AI-based search algorithms using NumPy and TensorFlow to solve a pathfinding problem. Y our task involves the following steps:

Implement the DFS algorithm using NumPy to find a path from the start node to the goal node.

Analyze and report the time complexity and space complexity of DFS for this implementation.
import numpy as np
import tensorflow as tf

# Graph represented as an adjacency matrix (NxN)
# 1 indicates an edge between nodes, 0 means no edge.
# Example graph:
# Node 0 connects to 1 and 2
# Node 1 connects to 0 and 3
# Node 2 connects to 0
# Node 3 connects to 1

graph = np.array([
    [0, 1, 1, 0],  # Node 0
    [1, 0, 0, 1],  # Node 1
    [1, 0, 0, 0],  # Node 2
    [0, 1, 0, 0]   # Node 3
])

# Initialize a tensor to keep track of visited nodes on CPU
with tf.device('/CPU:0'):
    visited = tf.Variable(np.zeros(graph.shape[0]), dtype=tf.float32)

# DFS function with TensorFlow support
def dfs(graph, node, visited):
    # Mark the current node as visited
    visited[node].assign(1)
    print(f"Visiting node {node}")

    # Loop through all neighbors of the current node
    for neighbor in range(graph.shape[0]):
        if graph[node, neighbor] == 1 and visited[neighbor].numpy() == 0:  # Unvisited neighbor
            print(f"Node {node} -> Going to node {neighbor}")
            dfs(graph, neighbor, visited)

    # Backtracking: Unmark the node after processing all neighbors
    visited[node].assign(0)
    print(f"Backtracking from node {node}")

# Start DFS from node 0, using the CPU device
with tf.device('/CPU:0'):
    # Start DFS from node 0
    dfs(graph, 0, visited)

Time Complexity:

DFS explores all nodes and edges in the worst case. Thus, the time complexity is O(V + E), where V is the number of vertices (nodes) and E is the number of edges in the graph.
In the example, V = 4 and E = 4.

Space Complexity:

Visited Nodes: We use an array of size V to track which nodes have been visited, which takes O(V) space.
Recursion Stack: The depth of recursion can go as deep as V in the worst case, so the space complexity due to the recursion stack is also O(V).
Thus, the overall space complexity is O(V).
Enter fullscreen mode Exit fullscreen mode

Top comments (0)