From 7c324c35dc2635a634d40a7f3b19e1145c103124 Mon Sep 17 00:00:00 2001 From: nex Date: Tue, 5 Dec 2023 16:47:21 +0000 Subject: [PATCH] Remove sentiment analysis --- cogs/events.py | 15 ----- cogs/other.py | 48 ---------------- utils/sentiment_analysis.py | 112 ------------------------------------ 3 files changed, 175 deletions(-) delete mode 100644 utils/sentiment_analysis.py diff --git a/cogs/events.py b/cogs/events.py index a767b30..2ead3d8 100644 --- a/cogs/events.py +++ b/cogs/events.py @@ -94,21 +94,6 @@ class Events(commands.Cog): def cog_unload(self): self.fetch_discord_atom_feed.cancel() - # noinspection DuplicatedCode - async def analyse_text(self, text: str) -> Optional[Tuple[float, float, float, float]]: - """Analyse text for positivity, negativity and neutrality.""" - - def inner(): - try: - from utils.sentiment_analysis import intensity_analyser - except ImportError: - return None - scores = intensity_analyser.polarity_scores(text) - return scores["pos"], scores["neu"], scores["neg"], scores["compound"] - - async with self.bot.training_lock: - return await self.bot.loop.run_in_executor(None, inner) - @commands.Cog.listener("on_raw_reaction_add") async def on_raw_reaction_add(self, payload: discord.RawReactionActionEvent): channel: Optional[discord.TextChannel] = self.bot.get_channel(payload.channel_id) diff --git a/cogs/other.py b/cogs/other.py index c25043b..1e90567 100644 --- a/cogs/other.py +++ b/cogs/other.py @@ -349,20 +349,6 @@ class OtherCog(commands.Cog): ) return result - async def analyse_text(self, text: str) -> Optional[Tuple[float, float, float, float]]: - """Analyse text for positivity, negativity and neutrality.""" - - def inner(): - try: - from utils.sentiment_analysis import intensity_analyser - except ImportError: - return None - scores = intensity_analyser.polarity_scores(text) - return scores["pos"], scores["neu"], scores["neg"], scores["compound"] - - async with self.bot.training_lock: - return await self.bot.loop.run_in_executor(None, inner) - @staticmethod async def get_xkcd(session: aiohttp.ClientSession, n: int) -> dict | None: async with session.get("https://xkcd.com/{!s}/info.0.json".format(n)) as response: @@ -445,40 +431,6 @@ class OtherCog(commands.Cog): view = self.XKCDGalleryView(number) return await ctx.respond(embed=embed, view=view) - @commands.slash_command() - async def sentiment(self, ctx: discord.ApplicationContext, *, text: str): - """Attempts to detect a text's tone""" - await ctx.defer() - if not text: - return await ctx.respond("You need to provide some text to analyse.") - result = await self.analyse_text(text) - if result is None: - return await ctx.edit(content="Failed to load sentiment analysis module.") - embed = discord.Embed(title="Sentiment Analysis", color=discord.Colour.embed_background()) - embed.add_field(name="Positive", value="{:.2%}".format(result[0])) - embed.add_field(name="Neutral", value="{:.2%}".format(result[2])) - embed.add_field(name="Negative", value="{:.2%}".format(result[1])) - embed.add_field(name="Compound", value="{:.2%}".format(result[3])) - return await ctx.edit(content=None, embed=embed) - - @commands.message_command(name="Detect Sentiment") - async def message_sentiment(self, ctx: discord.ApplicationContext, message: discord.Message): - await ctx.defer() - text = str(message.clean_content) - if not text: - return await ctx.respond("You need to provide some text to analyse.") - await ctx.respond("Analyzing (this may take some time)...") - result = await self.analyse_text(text) - if result is None: - return await ctx.edit(content="Failed to load sentiment analysis module.") - embed = discord.Embed(title="Sentiment Analysis", color=discord.Colour.embed_background()) - embed.add_field(name="Positive", value="{:.2%}".format(result[0])) - embed.add_field(name="Neutral", value="{:.2%}".format(result[2])) - embed.add_field(name="Negative", value="{:.2%}".format(result[1])) - embed.add_field(name="Compound", value="{:.2%}".format(result[3])) - embed.url = message.jump_url - return await ctx.edit(content=None, embed=embed) - corrupt_file = discord.SlashCommandGroup( name="corrupt-file", description="Corrupts files.", diff --git a/utils/sentiment_analysis.py b/utils/sentiment_analysis.py deleted file mode 100644 index 7fddd78..0000000 --- a/utils/sentiment_analysis.py +++ /dev/null @@ -1,112 +0,0 @@ -# I have NO idea how this works -# I copied it from the tutorial -# However it works -import random -import re -import string - -from nltk import FreqDist, NaiveBayesClassifier, classify -from nltk.corpus import movie_reviews, stopwords, twitter_samples -from nltk.sentiment.vader import SentimentIntensityAnalyzer -from nltk.stem.wordnet import WordNetLemmatizer -from nltk.tag import pos_tag - -positive_tweets = twitter_samples.strings("positive_tweets.json") -negative_tweets = twitter_samples.strings("negative_tweets.json") -positive_reviews = movie_reviews.categories("pos") -negative_reviews = movie_reviews.categories("neg") -positive_tweets += positive_reviews -# negative_tweets += negative_reviews -positive_tweet_tokens = twitter_samples.tokenized("positive_tweets.json") -negative_tweet_tokens = twitter_samples.tokenized("negative_tweets.json") -text = twitter_samples.strings("tweets.20150430-223406.json") -tweet_tokens = twitter_samples.tokenized("positive_tweets.json") -stop_words = stopwords.words("english") - - -def lemmatize_sentence(_tokens): - lemmatizer = WordNetLemmatizer() - lemmatized_sentence = [] - for word, tag in pos_tag(_tokens): - if tag.startswith("NN"): - pos = "n" - elif tag.startswith("VB"): - pos = "v" - else: - pos = "a" - lemmatized_sentence.append(lemmatizer.lemmatize(word, pos)) - return lemmatized_sentence - - -def remove_noise(_tweet_tokens, _stop_words=()): - cleaned_tokens = [] - - for token, tag in pos_tag(_tweet_tokens): - token = re.sub("https?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*(),]|" "%[0-9a-fA-F][0-9a-fA-F])+", "", token) - token = re.sub("(@[A-Za-z0-9_]+)", "", token) - - if tag.startswith("NN"): - pos = "n" - elif tag.startswith("VB"): - pos = "v" - else: - pos = "a" - - lemmatizer = WordNetLemmatizer() - token = lemmatizer.lemmatize(token, pos) - - if len(token) > 0 and token not in string.punctuation and token.lower() not in _stop_words: - cleaned_tokens.append(token.lower()) - return cleaned_tokens - - -positive_cleaned_tokens_list = [] -negative_cleaned_tokens_list = [] - -for tokens in positive_tweet_tokens: - positive_cleaned_tokens_list.append(remove_noise(tokens, stop_words)) - -for tokens in negative_tweet_tokens: - negative_cleaned_tokens_list.append(remove_noise(tokens, stop_words)) - - -def get_all_words(cleaned_tokens_list): - for _tokens in cleaned_tokens_list: - for token in _tokens: - yield token - - -all_pos_words = get_all_words(positive_cleaned_tokens_list) -freq_dist_pos = FreqDist(all_pos_words) - - -def get_tweets_for_model(cleaned_tokens_list): - for _tweet_tokens in cleaned_tokens_list: - yield {token: True for token in _tweet_tokens} - - -positive_tokens_for_model = get_tweets_for_model(positive_cleaned_tokens_list) -negative_tokens_for_model = get_tweets_for_model(negative_cleaned_tokens_list) - -positive_dataset = [(tweet_dict, "Positive") for tweet_dict in positive_tokens_for_model] - -negative_dataset = [(tweet_dict, "Negative") for tweet_dict in negative_tokens_for_model] - -dataset = positive_dataset + negative_dataset - -random.shuffle(dataset) - -train_data = dataset[:7000] -test_data = dataset[7000:] -classifier = NaiveBayesClassifier.train(train_data) -intensity_analyser = SentimentIntensityAnalyzer() - -if __name__ == "__main__": - while True: - try: - ex = input("> ") - except KeyboardInterrupt: - break - else: - print(classifier.classify({token: True for token in remove_noise(ex.split())})) - print(intensity_analyser.polarity_scores(ex))