Test getPDF Text

This commit is contained in:
2023-03-17 22:59:18 -04:00
parent ca7a0c026a
commit f9ee755a29
4 changed files with 51 additions and 16 deletions

View File

@@ -5,6 +5,7 @@ import asyncio
import os
import requests as req
from bs4 import BeautifulSoup as soup
import pdftotext
bot = commands.Bot()
@@ -16,8 +17,6 @@ async def search(txt):
doc = soup(resp.text, 'html.parser')
print(doc.title)
if 'not found' in doc.find('title').get_text():
return None
@@ -29,6 +28,12 @@ async def search(txt):
'pdf': pdf[2:]
}
async def getPDF(url):
resp = req.get(url, stream=True)
pdf = resp.content
pages = pdftotext.PDF(pdf)
return "\n\n".join(pages)
@bot.event
async def on_ready():
print(f'We have logged in as {bot.user}')
@@ -47,8 +52,12 @@ async def summarize(
await interaction.followup.send(f"Unable to find article: {article}")
return
await interaction.followup.send(f"""Article Found: \n{resp['ref']}
PDF loc: {resp['pdf']}""")
await interaction.followup.send(f"Article Found: {resp['ref']}
Parsing PDF...")
await interaction.followup.edit_message(f"""Article Found: {resp['ref']}
```
{getPDF(resp['pdf'])[:1000]}
```""")
def main():
# TODO: Import bot token from env