From 7c324c35dc2635a634d40a7f3b19e1145c103124 Mon Sep 17 00:00:00 2001
From: nex <me@nexy7574.co.uk>
Date: Tue, 5 Dec 2023 16:47:21 +0000
Subject: [PATCH] Remove sentiment analysis

---
 cogs/events.py              |  15 -----
 cogs/other.py               |  48 ----------------
 utils/sentiment_analysis.py | 112 ------------------------------------
 3 files changed, 175 deletions(-)
 delete mode 100644 utils/sentiment_analysis.py

diff --git a/cogs/events.py b/cogs/events.py
index a767b30..2ead3d8 100644
--- a/cogs/events.py
+++ b/cogs/events.py
@@ -94,21 +94,6 @@ class Events(commands.Cog):
     def cog_unload(self):
         self.fetch_discord_atom_feed.cancel()
 
-    # noinspection DuplicatedCode
-    async def analyse_text(self, text: str) -> Optional[Tuple[float, float, float, float]]:
-        """Analyse text for positivity, negativity and neutrality."""
-
-        def inner():
-            try:
-                from utils.sentiment_analysis import intensity_analyser
-            except ImportError:
-                return None
-            scores = intensity_analyser.polarity_scores(text)
-            return scores["pos"], scores["neu"], scores["neg"], scores["compound"]
-
-        async with self.bot.training_lock:
-            return await self.bot.loop.run_in_executor(None, inner)
-
     @commands.Cog.listener("on_raw_reaction_add")
     async def on_raw_reaction_add(self, payload: discord.RawReactionActionEvent):
         channel: Optional[discord.TextChannel] = self.bot.get_channel(payload.channel_id)
diff --git a/cogs/other.py b/cogs/other.py
index c25043b..1e90567 100644
--- a/cogs/other.py
+++ b/cogs/other.py
@@ -349,20 +349,6 @@ class OtherCog(commands.Cog):
                     )
         return result
 
-    async def analyse_text(self, text: str) -> Optional[Tuple[float, float, float, float]]:
-        """Analyse text for positivity, negativity and neutrality."""
-
-        def inner():
-            try:
-                from utils.sentiment_analysis import intensity_analyser
-            except ImportError:
-                return None
-            scores = intensity_analyser.polarity_scores(text)
-            return scores["pos"], scores["neu"], scores["neg"], scores["compound"]
-
-        async with self.bot.training_lock:
-            return await self.bot.loop.run_in_executor(None, inner)
-
     @staticmethod
     async def get_xkcd(session: aiohttp.ClientSession, n: int) -> dict | None:
         async with session.get("https://xkcd.com/{!s}/info.0.json".format(n)) as response:
@@ -445,40 +431,6 @@ class OtherCog(commands.Cog):
         view = self.XKCDGalleryView(number)
         return await ctx.respond(embed=embed, view=view)
 
-    @commands.slash_command()
-    async def sentiment(self, ctx: discord.ApplicationContext, *, text: str):
-        """Attempts to detect a text's tone"""
-        await ctx.defer()
-        if not text:
-            return await ctx.respond("You need to provide some text to analyse.")
-        result = await self.analyse_text(text)
-        if result is None:
-            return await ctx.edit(content="Failed to load sentiment analysis module.")
-        embed = discord.Embed(title="Sentiment Analysis", color=discord.Colour.embed_background())
-        embed.add_field(name="Positive", value="{:.2%}".format(result[0]))
-        embed.add_field(name="Neutral", value="{:.2%}".format(result[2]))
-        embed.add_field(name="Negative", value="{:.2%}".format(result[1]))
-        embed.add_field(name="Compound", value="{:.2%}".format(result[3]))
-        return await ctx.edit(content=None, embed=embed)
-
-    @commands.message_command(name="Detect Sentiment")
-    async def message_sentiment(self, ctx: discord.ApplicationContext, message: discord.Message):
-        await ctx.defer()
-        text = str(message.clean_content)
-        if not text:
-            return await ctx.respond("You need to provide some text to analyse.")
-        await ctx.respond("Analyzing (this may take some time)...")
-        result = await self.analyse_text(text)
-        if result is None:
-            return await ctx.edit(content="Failed to load sentiment analysis module.")
-        embed = discord.Embed(title="Sentiment Analysis", color=discord.Colour.embed_background())
-        embed.add_field(name="Positive", value="{:.2%}".format(result[0]))
-        embed.add_field(name="Neutral", value="{:.2%}".format(result[2]))
-        embed.add_field(name="Negative", value="{:.2%}".format(result[1]))
-        embed.add_field(name="Compound", value="{:.2%}".format(result[3]))
-        embed.url = message.jump_url
-        return await ctx.edit(content=None, embed=embed)
-
     corrupt_file = discord.SlashCommandGroup(
         name="corrupt-file",
         description="Corrupts files.",
diff --git a/utils/sentiment_analysis.py b/utils/sentiment_analysis.py
deleted file mode 100644
index 7fddd78..0000000
--- a/utils/sentiment_analysis.py
+++ /dev/null
@@ -1,112 +0,0 @@
-# I have NO idea how this works
-# I copied it from the tutorial
-# However it works
-import random
-import re
-import string
-
-from nltk import FreqDist, NaiveBayesClassifier, classify
-from nltk.corpus import movie_reviews, stopwords, twitter_samples
-from nltk.sentiment.vader import SentimentIntensityAnalyzer
-from nltk.stem.wordnet import WordNetLemmatizer
-from nltk.tag import pos_tag
-
-positive_tweets = twitter_samples.strings("positive_tweets.json")
-negative_tweets = twitter_samples.strings("negative_tweets.json")
-positive_reviews = movie_reviews.categories("pos")
-negative_reviews = movie_reviews.categories("neg")
-positive_tweets += positive_reviews
-# negative_tweets += negative_reviews
-positive_tweet_tokens = twitter_samples.tokenized("positive_tweets.json")
-negative_tweet_tokens = twitter_samples.tokenized("negative_tweets.json")
-text = twitter_samples.strings("tweets.20150430-223406.json")
-tweet_tokens = twitter_samples.tokenized("positive_tweets.json")
-stop_words = stopwords.words("english")
-
-
-def lemmatize_sentence(_tokens):
-    lemmatizer = WordNetLemmatizer()
-    lemmatized_sentence = []
-    for word, tag in pos_tag(_tokens):
-        if tag.startswith("NN"):
-            pos = "n"
-        elif tag.startswith("VB"):
-            pos = "v"
-        else:
-            pos = "a"
-        lemmatized_sentence.append(lemmatizer.lemmatize(word, pos))
-    return lemmatized_sentence
-
-
-def remove_noise(_tweet_tokens, _stop_words=()):
-    cleaned_tokens = []
-
-    for token, tag in pos_tag(_tweet_tokens):
-        token = re.sub("https?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*(),]|" "%[0-9a-fA-F][0-9a-fA-F])+", "", token)
-        token = re.sub("(@[A-Za-z0-9_]+)", "", token)
-
-        if tag.startswith("NN"):
-            pos = "n"
-        elif tag.startswith("VB"):
-            pos = "v"
-        else:
-            pos = "a"
-
-        lemmatizer = WordNetLemmatizer()
-        token = lemmatizer.lemmatize(token, pos)
-
-        if len(token) > 0 and token not in string.punctuation and token.lower() not in _stop_words:
-            cleaned_tokens.append(token.lower())
-    return cleaned_tokens
-
-
-positive_cleaned_tokens_list = []
-negative_cleaned_tokens_list = []
-
-for tokens in positive_tweet_tokens:
-    positive_cleaned_tokens_list.append(remove_noise(tokens, stop_words))
-
-for tokens in negative_tweet_tokens:
-    negative_cleaned_tokens_list.append(remove_noise(tokens, stop_words))
-
-
-def get_all_words(cleaned_tokens_list):
-    for _tokens in cleaned_tokens_list:
-        for token in _tokens:
-            yield token
-
-
-all_pos_words = get_all_words(positive_cleaned_tokens_list)
-freq_dist_pos = FreqDist(all_pos_words)
-
-
-def get_tweets_for_model(cleaned_tokens_list):
-    for _tweet_tokens in cleaned_tokens_list:
-        yield {token: True for token in _tweet_tokens}
-
-
-positive_tokens_for_model = get_tweets_for_model(positive_cleaned_tokens_list)
-negative_tokens_for_model = get_tweets_for_model(negative_cleaned_tokens_list)
-
-positive_dataset = [(tweet_dict, "Positive") for tweet_dict in positive_tokens_for_model]
-
-negative_dataset = [(tweet_dict, "Negative") for tweet_dict in negative_tokens_for_model]
-
-dataset = positive_dataset + negative_dataset
-
-random.shuffle(dataset)
-
-train_data = dataset[:7000]
-test_data = dataset[7000:]
-classifier = NaiveBayesClassifier.train(train_data)
-intensity_analyser = SentimentIntensityAnalyzer()
-
-if __name__ == "__main__":
-    while True:
-        try:
-            ex = input("> ")
-        except KeyboardInterrupt:
-            break
-        else:
-            print(classifier.classify({token: True for token in remove_noise(ex.split())}))
-            print(intensity_analyser.polarity_scores(ex))