Test getPDF Text
This commit is contained in:
36
flake.nix
36
flake.nix
@@ -13,6 +13,27 @@
|
||||
let
|
||||
inherit (poetry2nix.legacyPackages.${system}) mkPoetryApplication mkPoetryEnv mkPoetryPackages defaultPoetryOverrides;
|
||||
pkgs = nixpkgs.legacyPackages.${system};
|
||||
overrides = defaultPoetryOverrides.extend
|
||||
(self: super: {
|
||||
nextcord = super.nextcord.overridePythonAttrs
|
||||
(
|
||||
old: {
|
||||
propagatedBuildInputs = (
|
||||
old.propagatedBuildInputs or []
|
||||
) ++ [super.setuptools];
|
||||
}
|
||||
);
|
||||
pdftotext = super.pdftotext.overridePythonAttrs
|
||||
(
|
||||
old: {
|
||||
buildInputs = (
|
||||
old.buildInputs or []
|
||||
) ++ (with pkgs; [
|
||||
poppler
|
||||
]);
|
||||
}
|
||||
);
|
||||
});
|
||||
in rec {
|
||||
|
||||
hydraJobs = pkgs.lib.optionalAttrs
|
||||
@@ -49,17 +70,7 @@ DOC
|
||||
projectDir = self;
|
||||
# TODO: Upload to poetry2nix
|
||||
# https://github.com/nix-community/poetry2nix/blob/master/docs/edgecases.md
|
||||
overrides = defaultPoetryOverrides.extend
|
||||
(self: super: {
|
||||
nextcord = super.nextcord.overridePythonAttrs
|
||||
(
|
||||
old: {
|
||||
propagatedBuildInputs = (
|
||||
old.propagatedBuildInputs or []
|
||||
) ++ [super.setuptools];
|
||||
}
|
||||
);
|
||||
});
|
||||
inherit overrides;
|
||||
};
|
||||
default = self.packages.${system}.handyhelper;
|
||||
};
|
||||
@@ -70,8 +81,11 @@ DOC
|
||||
poetry2nix.packages.${system}.poetry
|
||||
(mkPoetryEnv {
|
||||
projectDir = self;
|
||||
inherit overrides;
|
||||
})
|
||||
sops
|
||||
# For pdftotext
|
||||
poppler
|
||||
];
|
||||
};
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import asyncio
|
||||
import os
|
||||
import requests as req
|
||||
from bs4 import BeautifulSoup as soup
|
||||
import pdftotext
|
||||
|
||||
bot = commands.Bot()
|
||||
|
||||
@@ -16,8 +17,6 @@ async def search(txt):
|
||||
|
||||
doc = soup(resp.text, 'html.parser')
|
||||
|
||||
print(doc.title)
|
||||
|
||||
if 'not found' in doc.find('title').get_text():
|
||||
return None
|
||||
|
||||
@@ -29,6 +28,12 @@ async def search(txt):
|
||||
'pdf': pdf[2:]
|
||||
}
|
||||
|
||||
async def getPDF(url):
|
||||
resp = req.get(url, stream=True)
|
||||
pdf = resp.content
|
||||
pages = pdftotext.PDF(pdf)
|
||||
return "\n\n".join(pages)
|
||||
|
||||
@bot.event
|
||||
async def on_ready():
|
||||
print(f'We have logged in as {bot.user}')
|
||||
@@ -47,8 +52,12 @@ async def summarize(
|
||||
await interaction.followup.send(f"Unable to find article: {article}")
|
||||
return
|
||||
|
||||
await interaction.followup.send(f"""Article Found: \n{resp['ref']}
|
||||
PDF loc: {resp['pdf']}""")
|
||||
await interaction.followup.send(f"Article Found: {resp['ref']}
|
||||
Parsing PDF...")
|
||||
await interaction.followup.edit_message(f"""Article Found: {resp['ref']}
|
||||
```
|
||||
{getPDF(resp['pdf'])[:1000]}
|
||||
```""")
|
||||
|
||||
def main():
|
||||
# TODO: Import bot token from env
|
||||
|
||||
13
poetry.lock
generated
13
poetry.lock
generated
@@ -677,6 +677,17 @@ files = [
|
||||
qa = ["flake8 (==3.8.3)", "mypy (==0.782)"]
|
||||
testing = ["docopt", "pytest (<6.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "pdftotext"
|
||||
version = "2.2.2"
|
||||
description = "Simple PDF text extraction"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "pdftotext-2.2.2.tar.gz", hash = "sha256:2a9aa89bc62022408781b39d188fabf5a3ad1103b6630f32c4e27e395f7966ee"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pexpect"
|
||||
version = "4.8.0"
|
||||
@@ -1156,4 +1167,4 @@ multidict = ">=4.0"
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "270b70e2eeff44e2b0bb07902f333dcbc45bb8c453a16769f9fd45b5a70c4993"
|
||||
content-hash = "37cd0a0c44f8d8dc60f31db3cd1c01303d4ca07f85282c6a1823ed8135714313"
|
||||
|
||||
@@ -15,6 +15,7 @@ requests = "^2.28.2"
|
||||
nextcord = "^2.4.1"
|
||||
openai = "^0.27.2"
|
||||
beautifulsoup4 = "^4.11.2"
|
||||
pdftotext = "^2.2.2"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
ipython = "^8.11.0"
|
||||
|
||||
Reference in New Issue
Block a user