Test getPDF Text

2023-03-17 22:59:18 -04:00
parent ca7a0c026a
commit f9ee755a29
4 changed files with 51 additions and 16 deletions
--- a/handyhelper/init.py
+++ b/handyhelper/init.py
@@ -5,6 +5,7 @@ import asyncio
 import os
 import requests as req
 from bs4 import BeautifulSoup as soup
+import pdftotext

 bot = commands.Bot()

@@ -16,8 +17,6 @@ async def search(txt):

    doc = soup(resp.text, 'html.parser')

-    print(doc.title)
-    
    if 'not found' in doc.find('title').get_text():
        return None

@@ -29,6 +28,12 @@ async def search(txt):
        'pdf': pdf[2:]
    }

+async def getPDF(url):
+    resp = req.get(url, stream=True)
+    pdf = resp.content
+    pages = pdftotext.PDF(pdf)
+    return "\n\n".join(pages)
+
@bot.event
 async def on_ready():
    print(f'We have logged in as {bot.user}')
@@ -47,8 +52,12 @@ async def summarize(
        await interaction.followup.send(f"Unable to find article: {article}")
        return

-    await interaction.followup.send(f"""Article Found: \n{resp['ref']}
-    PDF loc: {resp['pdf']}""")
+    await interaction.followup.send(f"Article Found: {resp['ref']}
+    Parsing PDF...")
+    await interaction.followup.edit_message(f"""Article Found: {resp['ref']}
+    ```
+    {getPDF(resp['pdf'])[:1000]}
+    ```""")

 def main():
    # TODO: Import bot token from env