Informational Retrieval

harshwardhan katke
16 minute read
0

PRACTICAL 1

Aim: - Document Indexing and Retrieval 

a) Implement an inverted index construction algorithm. 

b) Build a simple document retrieval system using the constructed index.


a) Implement an inverted index construction algorithm. 

Code: from collections import defaultdict


# Function to build the inverted index

def build_inverted_index(documents):

    inverted_index = defaultdict(list)

    # Iterate over documents

    for doc_id, document in enumerate(documents):

        words = document.lower().split()  # Convert to lowercase and split into words

        for word in set(words):  # Use set to avoid duplicates in a document

            inverted_index[word].append(doc_id)

    return inverted_index


# Sample documents

documents = [

    "Information retrieval is important.",

    "Document indexing is part of retrieval.",

    "Retrieval systems need an inverted index."

]


# Build the inverted index

inverted_index = build_inverted_index(documents)


# Print the inverted index

for word, doc_ids in inverted_index.items():

    print(f"{word}: {doc_ids}")



OUTPUT: information: [0]

retrieval: [0, 2]

is: [0, 1]

important.: [0]

part: [1]

of: [1]

document: [1]

retrieval.: [1]

indexing: [1]

index.: [2]

an: [2]

need: [2]

systems: [2]

inverted: [2]


b) Build a simple document retrieval system using the constructed index. 

Code: from collections import defaultdict


# Function to build the inverted index

def build_inverted_index(documents):

    inverted_index = defaultdict(list)

    # Iterate over documents

    for doc_id, document in enumerate(documents):

        words = document.lower().split()  # Convert to lowercase and split into words

        for word in set(words):  # Use set to avoid duplicates in a document

            inverted_index[word].append(doc_id)

    return inverted_index


# Function to retrieve documents for a query

def retrieve_documents(query, inverted_index):

    query = query.lower().split()  # Tokenize the query and convert to lowercase

    doc_ids = set()

    # Find documents for each word in the query

    for word in query:

        if word in inverted_index:

            doc_ids.update(inverted_index[word])

    return sorted(doc_ids)


# Sample documents

documents = [

    "Information retrieval is important.",

    "Document indexing is part of retrieval.",

    "Retrieval systems need an inverted index."

]


# Build the inverted index

inverted_index = build_inverted_index(documents)


# Display the inverted index

print("Inverted Index:")

for word, doc_ids in inverted_index.items():

    print(f"{word}: {doc_ids}")


# Search query example

query = "retrieval index"

retrieved_docs = retrieve_documents(query, inverted_index)


# Display the retrieved documents (IDs)

print(f"\nDocuments matching the query '{query}': {retrieved_docs}")


OUTPUT:

Inverted Index:

important.: [0]

information: [0]

is: [0, 1]

retrieval: [0, 2]

retrieval.: [1]

document: [1]

indexing: [1]

part: [1]

of: [1]

need: [2]

index.: [2]

an: [2]

inverted: [2]

systems: [2]


Documents matching the query 'retrieval index': [0, 2] 



PRACTICAL 2 

Aim:- Retrieval Models 

a) Implement the Boolean retrieval model and process queries. 

b) Implement the vector space model with TF-IDF weightng and cosine similarity.


a) Implement the Boolean retrieval model and process queries.

 CODE: from collections import defaultdict


# Build the inverted index

def build_index(docs):

    index = defaultdict(set)

    for i, doc in enumerate(docs):

        for word in doc.lower().split():

            index[word].add(i)

    return index


# Process Boolean query (AND, OR, NOT)

def process_query(query, index):

    query = query.lower().split()

    if 'and' in query:

        terms = query[::2]

        result = index[terms[0]] & index[terms[1]]

    elif 'or' in query:

        terms = query[::2]

        result = index[terms[0]] | index[terms[1]]

    elif 'not' in query:

        terms = query[::2]

        result = index[terms[0]] - index[terms[1]]

    else:

        result = index[query[0]]

    return sorted(result)


# Sample documents

documents = [

    "Informaton retrieval is important",

    "Document indexing and retrieval help search engines",

    "Retrieval systems are useful for data analysis"

]


# Build inverted index

index = build_index(documents)


# Sample query

query = "informaton and retrieval"

print(f"Result for query '{query}': {process_query(query, index)}")


OUTPUT:

Result for query 'informaton and retrieval': [0]



b) Implement the vector space model with TF-IDF weightng and cosine similarity. 

CODE: import math
from collections import Counter

# Compute Term Frequency (TF)
def compute_tf(doc):
    tf = Counter(doc)  # Count occurrences of each word
    total_terms = len(doc)  # Total number of words in the document
    return {word: count / total_terms for word, count in tf.items()}

# Compute Inverse Document Frequency (IDF)
def compute_idf(docs):
    total_docs = len(docs)
    idf = {}
    all_words = set(word for doc in docs for word in doc)  # Unique words across all documents
    for word in all_words:
        doc_freq = sum(1 for doc in docs if word in doc)  # Count docs containing the word
        idf[word] = math.log(total_docs / (doc_freq + 1))  # Avoid division by zero
    return idf

# Compute TF-IDF for each document
def compute_tfidf(docs, idf):
    tfidf_docs = []
    for doc in docs:
        tf = compute_tf(doc)  # Get TF of the current document
        tfidf = {word: tf.get(word, 0) * idf.get(word, 0) for word in doc}  # TF-IDF calculation
        tfidf_docs.append(tfidf)
    return tfidf_docs

# Compute Cosine Similarity between two vectors
def cosine_similarity(vec1, vec2):
    words = set(vec1) | set(vec2)  # Combine words from both vectors
    dot_product = sum(vec1.get(word, 0) * vec2.get(word, 0) for word in words)  # Dot product
    magnitude1 = math.sqrt(sum(val**2 for val in vec1.values()))  # Magnitude of vec1
    magnitude2 = math.sqrt(sum(val**2 for val in vec2.values()))  # Magnitude of vec2
    if magnitude1 and magnitude2:
        return dot_product / (magnitude1 * magnitude2)  # Cosine similarity formula
    return 0

# Sample documents (list of words per document)
docs = [
    ["informaton", "retrieval", "is", "important"],
    ["document", "indexing", "and", "retrieval"],
    ["search", "engines", "use", "informaton"]
]

# Compute IDF and TF-IDF
idf = compute_idf(docs)
tfidf_docs = compute_tfidf(docs, idf)

# Debugging print: To check TF-IDF values
print("TF-IDF Vectors for Documents:")
for i, tfidf in enumerate(tfidf_docs):
    print(f"Document {i}: {tfidf}")

# Calculate cosine similarity between Document 0 and Document 1
similarity = cosine_similarity(tfidf_docs[0], tfidf_docs[1])
print(f"Cosine Similarity between Document 0 and Document 1: {similarity:.4f}")

OUTPUT:
TF-IDF Vectors for Documents:
Document 0: {'informaton': 0.14384103622589045, 'retrieval': 0.0, 'is': 0.2876820724517809, 'important': 0.2876820724517809}
Document 1: {'document': 0.2876820724517809, 'indexing': 0.2876820724517809, 'and': 0.2876820724517809, 'retrieval': 0.0}
Document 2: {'search': 0.2876820724517809, 'engines': 0.2876820724517809, 'use': 0.2876820724517809, 'informaton': 0.14384103622589045}

Cosine Similarity between Document 0 and Document 1: 0.0000



                                                PRACTICAL 3 

Aim: - Spelling Correction in IR Systems 

a) Develop a spelling correction module using edit distance algorithms. 
b) Integrate the spelling correction module into an information retrieval system.


a) Develop a spelling correction module using edit distance algorithms. 
Code: import difflib
def correct_spelling(word, dictionary):
 suggestion = difflib.get_close_matches(word, dictionary, n=1, cutoff=0.7)
 return suggestion[0] if suggestion else word
# Example dictionary
dictionary = ["information", "retrieval", "search", "query", "system", "index"]
# Test the function
misspelled_word = "retrievel"
corrected_word = correct_spelling(misspelled_word, dictionary)
print(f"Did you mean: {corrected_word}?")

OUTPUT: Did you mean: retrieval?

b) Integrate the spelling correction module into an information retrieval system. 

Code: import difflib
# Spelling correction function
def correct_spelling(word, dictionary):
 suggestion = difflib.get_close_matches(word, dictionary, n=1, cutoff=0.7)
 return suggestion[0] if suggestion else word
# Simple IR system with a small document collection
documents = {
 "doc1": "Information retrieval is important.",
 "doc2": "Spelling correction improves search results.",
 "doc3": "Query processing includes spell checking."
}
# Search function with spell correction
def search(query, dictionary, documents):
 words = query.split()
 corrected_query = " ".join(correct_spelling(word, dictionary) for word in words)

 print(f"Corrected Query: {corrected_query}")

 # Retrieve matching documents
 results = [doc for doc, content in documents.items() if corrected_query in content]
 return results if results else ["No relevant documents found."]
# Example dictionary
dictionary = ["information", "retrieval", "spelling", "correction", "query", "processing", "search",
"results"]
# Test the system
query = "retrievel system"
results = search(query, dictionary, documents)
print("Search Results:", results)

OUTPUT: Corrected Query: retrieval system
Search Results: ['No relevant documents found.']


PRACTICAL 4 
Aim: - Evaluation Metrics for IR Systems 

a) Calculate precision, recall, and F-measure for a given set of retrieval results. 
b)Use an evaluation toolkit to measure average precision and other evaluation metrics.

a) Calculate precision, recall, and F-measure for a given set of retrieval results. 
 Code: def evaluate_ir(retrieved, relevant):
 retrieved_set = set(retrieved)
 relevant_set = set(relevant)

 true_positives = len(retrieved_set & relevant_set) # Relevant and retrieved
 precision = true_positives / len(retrieved) if retrieved else 0
 recall = true_positives / len(relevant) if relevant else 0
 f_measure = (2 * precision * recall) / (precision + recall) if (precision + recall) else 0

 return precision, recall, f_measure
# Example data
retrieved_docs = ["doc1", "doc2", "doc3", "doc4"]
relevant_docs = ["doc2", "doc3", "doc5"]
# Calculate metrics
precision, recall, f_measure = evaluate_ir(retrieved_docs, relevant_docs)
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F-Measure: {f_measure:.2f}")


OUTPUT: Precision: 0.50
                     Recall: 0.67
                 F-Measure: 0.57


b)Use an evaluation toolkit to measure average precision and other evaluation metrics. 

Code: from sklearn.metrics import average_precision_score
# Example relevance scores (1 = relevant, 0 = not relevant)
y_true = [1, 0, 1, 1, 0, 1] # Ground truth relevance
y_scores = [0.9, 0.2, 0.8, 0.7, 0.3, 0.6] # Retrieval system scores
# Calculate Average Precision
ap = average_precision_score(y_true, y_scores)

print(f"Average Precision (AP): {ap:.2f}") 

OUTPUT: Average Precision (AP): 1.00 (output may change)


PRACTICAL 5 
Aim: - Text Categorization 

a) Implement a text classification algorithm (e.g., Naive Bayes or Support Vector Machines). 
b) Train the classifier on a labelled dataset and evaluate its performance 

a) Implement a text classification algorithm (e.g., Naive Bayes or Support Vector Machines). 
Code: # Sample dataset: text and labels
data = [
    ("I love this movie", "positive"),
    ("This film is terrible", "negative"),
    ("Amazing story", "positive"),
    ("Worst movie ever", "negative")
]

# Training: Count word occurrences per class
word_counts = {"positive": {}, "negative": {}}
label_counts = {"positive": 0, "negative": 0}

for text, label in data:
    words = text.lower().split()
    label_counts[label] += 1
    for word in words:
        word_counts[label][word] = word_counts[label].get(word, 0) + 1

# Simple classification function
def classify(text):
    words = text.lower().split()
    scores = {
        "positive": label_counts["positive"],
        "negative": label_counts["negative"]
    }

    for label in scores:
        for word in words:
            scores[label] += word_counts[label].get(word, 0)

    return max(scores, key=scores.get)

# Test the classifier
test_text = "Amazing movie"
predicted_label = classify(test_text)
print(f"Predicted Label: {predicted_label}")

OUTPUT: Predicted Label: positive


b) Train the classifier on a labelled dataset and evaluate its performance. 
Code: # Define positive and negative words manually
positive_words = {"good", "great", "excellent", "amazing", "fantastic", "delicious", "love", "best"}
negative_words = {"bad", "terrible", "worst", "awful", "hate", "horrible", "disgusting", "never"}

# Sample dataset: (text, label)
data = [
    ("The food was delicious", "positive"),
    ("I hated the taste", "negative"),
    ("Service was excellent", "positive"),
    ("It was a terrible experience", "negative"),
    ("The meal was fantastic", "positive"),
    ("I will never come back", "negative")
]

# Split into training and testing sets
train_data = data[:5]  # First 5 for training
test_data = data[5:]   # Last 1 for testing

# Simple keyword-based classification function
def classify(text):
    words = set(text.lower().split())  # Convert text into a set of words
    pos_count = len(words & positive_words)  # Count matching positive words
    neg_count = len(words & negative_words)  # Count matching negative words

    if pos_count > neg_count:
        return "positive"
    elif neg_count > pos_count:
        return "negative"
    else:
        return "neutral"  # If no match, classify as neutral

# Evaluate accuracy
correct = 0
for text, actual_label in test_data:
    predicted_label = classify(text)
    print(f"Text: '{text}' → Predicted: {predicted_label}, Actual: {actual_label}")
    if predicted_label == actual_label:
        correct += 1

accuracy = correct / len(test_data) * 100
print(f"Accuracy: {accuracy:.2f}%")

OUTPUT: Text: 'I will never come back' → Predicted: negative, Actual: negative
Accuracy: 100.00%


PRACTICAL 6 
Aim: - Clustering for Information Retrieval 

a) Implement a clustering algorithm (e.g., K-means or hierarchical clustering). 
b)Apply the clustering algorithm to a set of documents and evaluate the clustering results.


a) Implement a clustering algorithm (e.g., K-means or hierarchical clustering). 
Code: import random

# Sample data points (1D for simplicity)
data = [2, 4, 6, 8, 20, 22, 24, 26]

# Number of clusters
k = 2

# Initialize cluster centers randomly
centers = random.sample(data, k)

# Assign points to nearest cluster
def assign_clusters(data, centers):
    clusters = {c: [] for c in centers}
    for point in data:
        closest_center = min(centers, key=lambda c: abs(c - point))
        clusters[closest_center].append(point)
    return clusters

# Update cluster centers
def update_centers(clusters):
    return [sum(points) // len(points) for points in clusters.values()]

# Run K-means for a few iterations
for _ in range(5):
    clusters = assign_clusters(data, centers)
    centers = update_centers(clusters)

# Print final clusters
print("Final Clusters:", clusters)

OUTPUT: Final Clusters: {5: [2, 4, 6, 8], 23: [20, 22, 24, 26]}


b) Apply the clustering algorithm to a set of documents and evaluate the clustering results. 
Code: # Sample documents
documents = [
    "Apple and banana are fruits",
    "Mango and orange are sweet fruits",
    "Cars and bikes are vehicles",
    "Trucks and buses are heavy vehicles"
]

# Define keywords for clustering
cluster_keywords = {
    "fruits": ["apple", "banana", "mango", "orange", "fruits", "sweet"],
    "vehicles": ["cars", "bikes", "trucks", "buses", "vehicles", "heavy"]
}

# Assign each document to the closest cluster
clusters = {"fruits": [], "vehicles": []}
for doc in documents:
    words = doc.lower().split()
    fruit_score = sum(word in cluster_keywords["fruits"] for word in words)
    vehicle_score = sum(word in cluster_keywords["vehicles"] for word in words)
    
    if fruit_score > vehicle_score:
        clusters["fruits"].append(doc)
    else:
        clusters["vehicles"].append(doc)

# Print clustered documents
print("Clustered Documents:", clusters)

OUTPUT: Clustered Documents: {
 'fruits': [
   'Apple and banana are fruits',
   'Mango and orange are sweet fruits'
 ],
 'vehicles': [
   'Cars and bikes are vehicles',
   'Trucks and buses are heavy vehicles'
 ]
}


PRACTICAL 7 
Aim: - Web Crawling and Indexing 

a) Develop a web crawler to fetch and index web pages. 
b) Handle challenges such as robots.txt, dynamic content, and crawling delays.

a) Develop a web crawler to fetch and index web pages. 
Code: import requests
from bs4 import BeautifulSoup
# Function to crawl and index a webpage
def crawl(url):
 response = requests.get(url)
 soup = BeautifulSoup(response.text, "html.parser")
 # Extract and print the page title and links
 title = soup.title.string if soup.title else "No Title"
 links = [a['href'] for a in soup.find_all('a', href=True)]
 print(f"Title: {title}")
 print("Links Found:", links)
# Start crawling
crawl("https://example.com") # Replace with any website

OUTPUT: Title: Example Domain
Links Found: ['https://www.iana.org/domains/example']

b) Handle challenges such as robots.txt, dynamic content, and crawling delays. 
Code: import requests
import time
from bs4 import BeautifulSoup
from urllib.robotparser import RobotFileParser

# Function to check robots.txt
def can_crawl(url):
    rp = RobotFileParser()
    rp.set_url(url + "/robots.txt")
    rp.read()
    return rp.can_fetch("*", url)

# Function to fetch and parse web page
def crawl(url):
    if not can_crawl(url):
        print("Crawling not allowed by robots.txt")
        return
    print(f"Crawling: {url}")
    
    response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})

    if response.status_code != 200:
        print("Failed to fetch page")
        return
    
    soup = BeautifulSoup(response.text, "html.parser")

    # Extract and print page title and links
    title = soup.title.string if soup.title else "No Title"
    links = [a['href'] for a in soup.find_all('a', href=True)]

    print(f"Title: {title}")
    print("Links Found:", links)
    time.sleep(2)  # Respect crawling delays

# Start crawling
crawl("https://example.com")  # Replace with any real URL

OUTPUT: Crawling: https://example.com
Title: Example Domain
Links Found: ['https://www.iana.org/domains/example']


PRACTICAL 8 
Aim: - Link Analysis and PageRank 

a) Implement the PageRank algorithm to rank web pages based on link analysis. 
b) Apply the PageRank algorithm to a small web graph and analyze the results.

a) Implement the PageRank algorithm to rank web pages based on link analysis. 
Code: # Simple PageRank Implementation
web_graph = {
    'A': ['B', 'C'],
    'B': ['C'],
    'C': ['A'],
    'D': ['C']
}

# Initialize ranks
ranks = {page: 1.0 for page in web_graph}
damping_factor = 0.85
iterations = 5

# PageRank Calculation
for _ in range(iterations):
    new_ranks = {}
    for page in web_graph:
        new_rank = (1 - damping_factor) + damping_factor * sum(
            ranks[linking_page] / len(web_graph[linking_page])
            for linking_page in web_graph if page in web_graph[linking_page]
        )
        new_ranks[page] = new_rank
    ranks = new_ranks  # Update ranks

# Print final PageRank values
print("Final PageRanks:", ranks)

OUTPUT: Final PageRanks: {'A': 1.5540671874999998, 'B': 0.8104785546875, 'C': 1.4854542578125, 'D': 0.15000000000000002}


b) Apply the PageRank algorithm to a small web graph and analyze the results. 
Code:  # Small web graph representation
web_graph = {
    'A': ['B', 'C'],
    'B': ['C'],
    'C': ['A'],
    'D': ['C']
}

# Initialize PageRank values
ranks = {page: 1.0 for page in web_graph}
damping_factor = 0.85
iterations = 5

# Calculate PageRank
for _ in range(iterations):
    new_ranks = {}
    for page in web_graph:
        new_rank = (1 - damping_factor) + damping_factor * sum(
            ranks[linking_page] / len(web_graph[linking_page])
            for linking_page in web_graph if page in web_graph[linking_page]
        )
        new_ranks[page] = new_rank
    ranks = new_ranks  # Update ranks

# Analyze results by ranking pages
sorted_ranks = sorted(ranks.items(), key=lambda x: x[1], reverse=True)

# Print final rankings
print("PageRank Scores:")
for page, score in sorted_ranks:
    print(f"Page {page}: {score:.4f}")


OUTPUT: PageRank Scores:
                  Page A: 1.5541
                  Page C: 1.4855
                  Page B: 0.8105
                  Page D: 0.1500



PRACTICAL 9 
Aim: - Learning to Rank 

a) Implement a learning to rank algorithm (e.g., RankSVM or RankBoost). 
b) Train the ranking model using labelled data and evaluate its effectiveness.


a) Implement a learning to rank algorithm (e.g., RankSVM or RankBoost). 
Code: # Sample training data (features) and relevance scores
X = [[0.2, 0.8], [0.5, 0.4], [0.9, 0.6], [0.4, 0.7]]
y = [2, 1, 3, 2] # Higher values mean higher relevance
# Function to compute a simple ranking score
def rank_score(x):
 return sum(x) # Simple sum of features as a ranking score
# Compute ranking scores for each sample
scores = [rank_score(x) for x in X]
# Sort by ranking score in descending order
ranked_results = sorted(zip(X, scores, y), key=lambda x: x[1], reverse=True)
# Print ranking results
print("Ranked Results (Feature, Score, Relevance):")
for item in ranked_results:
 print(item)

OUTPUT: Ranked Results (Feature, Score, Relevance):
                  ([0.9, 0.6], 1.5, 3)
                  ([0.4, 0.7], 1.1, 2)
                  ([0.2, 0.8], 1.0, 2)
                  ([0.5, 0.4], 0.9, 1)


b) Train the ranking model using labelled data and evaluate its effectiveness. 
Code: # Sample training data (features) and relevance scores
documents = [
 ("Doc1", [3, 5]), # Features: word frequency, keyword match
 ("Doc2", [1, 8]),
 ("Doc3", [4, 2]),
 ("Doc4", [2, 6])
]
relevance = {"Doc1": 3, "Doc2": 2, "Doc3": 1, "Doc4": 2} # Higher value = more relevant
# Function to compute a ranking score (weighted sum of features)
def rank_score(features):
 return 0.7 * features[0] + 0.3 * features[1] # Weighted formula
# Compute ranking scores for each document
scores = [(doc, rank_score(features), relevance[doc]) for doc, features in documents]
# Sort documents based on computed ranking score
ranked_results = sorted(scores, key=lambda x: x[1], reverse=True)
# Evaluate effectiveness (check if ranking order matches relevance scores)
correct = sum(1 for i in range(len(ranked_results) - 1) if ranked_results[i][2] >= ranked_results[i +
1][2])
accuracy = (correct / (len(ranked_results) - 1)) * 100
# Print ranked results and accuracy
print("Ranked Results (Document, Score, Relevance):")
for item in ranked_results:
 print(item)
print(f"Ranking Accuracy: {accuracy:.2f}%")

OUTPUT: Ranked Results (Document, Score, Relevance):
('Doc1', 3.5999999999999996, 3)
('Doc3', 3.4, 1)
('Doc4', 3.1999999999999997, 2)
('Doc2', 3.0999999999999996, 2)
Ranking Accuracy: 66.67%


PRACTICAL 10 
Aim: -Advanced Topics in Information Retrieval 

a) Implement a text summarization algorithm (e.g., extractive or abstractive). 
b) Build a question-answering system using techniques such as information extraction.

a) Implement a text summarization algorithm (e.g., extractive or abstractive). 
Code: # Sample text
text = """Text summarization is the process of condensing a document while preserving key
information.
Extractive summarization selects important sentences directly from the text, while abstractive
summarization generates new sentences.
Summarization helps improve readability and information retrieval efficiency."""

# Split text into sentences
sentences = text.split(". ")

# Compute word frequency
word_counts = {}
for sentence in sentences:
    for word in sentence.split():
        word_counts[word] = word_counts.get(word, 0) + 1

# Score sentences based on word frequency
sentence_scores = {
    sentence: sum(word_counts.get(word, 0) for word in sentence.split())
    for sentence in sentences
}

# Select the highest-scoring sentence as summary
summary = max(sentence_scores, key=sentence_scores.get)
print("Summary:", summary)

OUTPUT: Summary: Text summarization is the process of condensing a document while preserving key
information.
Extractive summarization selects important sentences directly from the text, while abstractive
summarization generates new sentences.
Summarization helps improve readability and information retrieval efficiency.


b) Build a question-answering system using techniques such as information extraction. 
Code: # Sample knowledge base (text and answers)
data = {
    "What is text summarization?": "Text summarization is the process of condensing a document while preserving key information.",
    "What is extractive summarization?": "Extractive summarization selects important sentences directly from the text.",
    "What is abstractive summarization?": "Abstractive summarization generates new sentences to convey the main idea."
}

# Function to find the best answer
def answer_question(question):
    return data.get(question, "Sorry, I don't know the answer.")

# User input
question = input("Ask a question: ")
print("Answer:", answer_question(question))

OUTPUT: Ask a question: What is extractive summarization?
Answer: Extractive summarization selects important sentences directly from the text.

THE END


















 




Post a Comment

0Comments
Post a Comment (0)