diff --git a/flake.nix b/flake.nix index 4540ab3..f0ca82e 100644 --- a/flake.nix +++ b/flake.nix @@ -13,6 +13,27 @@ let inherit (poetry2nix.legacyPackages.${system}) mkPoetryApplication mkPoetryEnv mkPoetryPackages defaultPoetryOverrides; pkgs = nixpkgs.legacyPackages.${system}; + overrides = defaultPoetryOverrides.extend + (self: super: { + nextcord = super.nextcord.overridePythonAttrs + ( + old: { + propagatedBuildInputs = ( + old.propagatedBuildInputs or [] + ) ++ [super.setuptools]; + } + ); + pdftotext = super.pdftotext.overridePythonAttrs + ( + old: { + buildInputs = ( + old.buildInputs or [] + ) ++ (with pkgs; [ + poppler + ]); + } + ); + }); in rec { hydraJobs = pkgs.lib.optionalAttrs @@ -49,17 +70,7 @@ DOC projectDir = self; # TODO: Upload to poetry2nix # https://github.com/nix-community/poetry2nix/blob/master/docs/edgecases.md - overrides = defaultPoetryOverrides.extend - (self: super: { - nextcord = super.nextcord.overridePythonAttrs - ( - old: { - propagatedBuildInputs = ( - old.propagatedBuildInputs or [] - ) ++ [super.setuptools]; - } - ); - }); + inherit overrides; }; default = self.packages.${system}.handyhelper; }; @@ -70,8 +81,11 @@ DOC poetry2nix.packages.${system}.poetry (mkPoetryEnv { projectDir = self; + inherit overrides; }) sops + # For pdftotext + poppler ]; }; } diff --git a/handyhelper/__init__.py b/handyhelper/__init__.py index d1b8dc0..2903d70 100644 --- a/handyhelper/__init__.py +++ b/handyhelper/__init__.py @@ -5,6 +5,7 @@ import asyncio import os import requests as req from bs4 import BeautifulSoup as soup +import pdftotext bot = commands.Bot() @@ -16,8 +17,6 @@ async def search(txt): doc = soup(resp.text, 'html.parser') - print(doc.title) - if 'not found' in doc.find('title').get_text(): return None @@ -29,6 +28,12 @@ async def search(txt): 'pdf': pdf[2:] } +async def getPDF(url): + resp = req.get(url, stream=True) + pdf = resp.content + pages = pdftotext.PDF(pdf) + return "\n\n".join(pages) + @bot.event async def on_ready(): print(f'We have logged in as {bot.user}') @@ -47,8 +52,12 @@ async def summarize( await interaction.followup.send(f"Unable to find article: {article}") return - await interaction.followup.send(f"""Article Found: \n{resp['ref']} - PDF loc: {resp['pdf']}""") + await interaction.followup.send(f"Article Found: {resp['ref']} + Parsing PDF...") + await interaction.followup.edit_message(f"""Article Found: {resp['ref']} + ``` + {getPDF(resp['pdf'])[:1000]} + ```""") def main(): # TODO: Import bot token from env diff --git a/poetry.lock b/poetry.lock index df4077f..3ebf68c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -677,6 +677,17 @@ files = [ qa = ["flake8 (==3.8.3)", "mypy (==0.782)"] testing = ["docopt", "pytest (<6.0.0)"] +[[package]] +name = "pdftotext" +version = "2.2.2" +description = "Simple PDF text extraction" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "pdftotext-2.2.2.tar.gz", hash = "sha256:2a9aa89bc62022408781b39d188fabf5a3ad1103b6630f32c4e27e395f7966ee"}, +] + [[package]] name = "pexpect" version = "4.8.0" @@ -1156,4 +1167,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "270b70e2eeff44e2b0bb07902f333dcbc45bb8c453a16769f9fd45b5a70c4993" +content-hash = "37cd0a0c44f8d8dc60f31db3cd1c01303d4ca07f85282c6a1823ed8135714313" diff --git a/pyproject.toml b/pyproject.toml index be96483..6559c75 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ requests = "^2.28.2" nextcord = "^2.4.1" openai = "^0.27.2" beautifulsoup4 = "^4.11.2" +pdftotext = "^2.2.2" [tool.poetry.group.dev.dependencies] ipython = "^8.11.0"