Test getPDF Text
This commit is contained in:
36
flake.nix
36
flake.nix
@@ -13,6 +13,27 @@
|
|||||||
let
|
let
|
||||||
inherit (poetry2nix.legacyPackages.${system}) mkPoetryApplication mkPoetryEnv mkPoetryPackages defaultPoetryOverrides;
|
inherit (poetry2nix.legacyPackages.${system}) mkPoetryApplication mkPoetryEnv mkPoetryPackages defaultPoetryOverrides;
|
||||||
pkgs = nixpkgs.legacyPackages.${system};
|
pkgs = nixpkgs.legacyPackages.${system};
|
||||||
|
overrides = defaultPoetryOverrides.extend
|
||||||
|
(self: super: {
|
||||||
|
nextcord = super.nextcord.overridePythonAttrs
|
||||||
|
(
|
||||||
|
old: {
|
||||||
|
propagatedBuildInputs = (
|
||||||
|
old.propagatedBuildInputs or []
|
||||||
|
) ++ [super.setuptools];
|
||||||
|
}
|
||||||
|
);
|
||||||
|
pdftotext = super.pdftotext.overridePythonAttrs
|
||||||
|
(
|
||||||
|
old: {
|
||||||
|
buildInputs = (
|
||||||
|
old.buildInputs or []
|
||||||
|
) ++ (with pkgs; [
|
||||||
|
poppler
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
);
|
||||||
|
});
|
||||||
in rec {
|
in rec {
|
||||||
|
|
||||||
hydraJobs = pkgs.lib.optionalAttrs
|
hydraJobs = pkgs.lib.optionalAttrs
|
||||||
@@ -49,17 +70,7 @@ DOC
|
|||||||
projectDir = self;
|
projectDir = self;
|
||||||
# TODO: Upload to poetry2nix
|
# TODO: Upload to poetry2nix
|
||||||
# https://github.com/nix-community/poetry2nix/blob/master/docs/edgecases.md
|
# https://github.com/nix-community/poetry2nix/blob/master/docs/edgecases.md
|
||||||
overrides = defaultPoetryOverrides.extend
|
inherit overrides;
|
||||||
(self: super: {
|
|
||||||
nextcord = super.nextcord.overridePythonAttrs
|
|
||||||
(
|
|
||||||
old: {
|
|
||||||
propagatedBuildInputs = (
|
|
||||||
old.propagatedBuildInputs or []
|
|
||||||
) ++ [super.setuptools];
|
|
||||||
}
|
|
||||||
);
|
|
||||||
});
|
|
||||||
};
|
};
|
||||||
default = self.packages.${system}.handyhelper;
|
default = self.packages.${system}.handyhelper;
|
||||||
};
|
};
|
||||||
@@ -70,8 +81,11 @@ DOC
|
|||||||
poetry2nix.packages.${system}.poetry
|
poetry2nix.packages.${system}.poetry
|
||||||
(mkPoetryEnv {
|
(mkPoetryEnv {
|
||||||
projectDir = self;
|
projectDir = self;
|
||||||
|
inherit overrides;
|
||||||
})
|
})
|
||||||
sops
|
sops
|
||||||
|
# For pdftotext
|
||||||
|
poppler
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import asyncio
|
|||||||
import os
|
import os
|
||||||
import requests as req
|
import requests as req
|
||||||
from bs4 import BeautifulSoup as soup
|
from bs4 import BeautifulSoup as soup
|
||||||
|
import pdftotext
|
||||||
|
|
||||||
bot = commands.Bot()
|
bot = commands.Bot()
|
||||||
|
|
||||||
@@ -16,8 +17,6 @@ async def search(txt):
|
|||||||
|
|
||||||
doc = soup(resp.text, 'html.parser')
|
doc = soup(resp.text, 'html.parser')
|
||||||
|
|
||||||
print(doc.title)
|
|
||||||
|
|
||||||
if 'not found' in doc.find('title').get_text():
|
if 'not found' in doc.find('title').get_text():
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -29,6 +28,12 @@ async def search(txt):
|
|||||||
'pdf': pdf[2:]
|
'pdf': pdf[2:]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async def getPDF(url):
|
||||||
|
resp = req.get(url, stream=True)
|
||||||
|
pdf = resp.content
|
||||||
|
pages = pdftotext.PDF(pdf)
|
||||||
|
return "\n\n".join(pages)
|
||||||
|
|
||||||
@bot.event
|
@bot.event
|
||||||
async def on_ready():
|
async def on_ready():
|
||||||
print(f'We have logged in as {bot.user}')
|
print(f'We have logged in as {bot.user}')
|
||||||
@@ -47,8 +52,12 @@ async def summarize(
|
|||||||
await interaction.followup.send(f"Unable to find article: {article}")
|
await interaction.followup.send(f"Unable to find article: {article}")
|
||||||
return
|
return
|
||||||
|
|
||||||
await interaction.followup.send(f"""Article Found: \n{resp['ref']}
|
await interaction.followup.send(f"Article Found: {resp['ref']}
|
||||||
PDF loc: {resp['pdf']}""")
|
Parsing PDF...")
|
||||||
|
await interaction.followup.edit_message(f"""Article Found: {resp['ref']}
|
||||||
|
```
|
||||||
|
{getPDF(resp['pdf'])[:1000]}
|
||||||
|
```""")
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# TODO: Import bot token from env
|
# TODO: Import bot token from env
|
||||||
|
|||||||
13
poetry.lock
generated
13
poetry.lock
generated
@@ -677,6 +677,17 @@ files = [
|
|||||||
qa = ["flake8 (==3.8.3)", "mypy (==0.782)"]
|
qa = ["flake8 (==3.8.3)", "mypy (==0.782)"]
|
||||||
testing = ["docopt", "pytest (<6.0.0)"]
|
testing = ["docopt", "pytest (<6.0.0)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pdftotext"
|
||||||
|
version = "2.2.2"
|
||||||
|
description = "Simple PDF text extraction"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
files = [
|
||||||
|
{file = "pdftotext-2.2.2.tar.gz", hash = "sha256:2a9aa89bc62022408781b39d188fabf5a3ad1103b6630f32c4e27e395f7966ee"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pexpect"
|
name = "pexpect"
|
||||||
version = "4.8.0"
|
version = "4.8.0"
|
||||||
@@ -1156,4 +1167,4 @@ multidict = ">=4.0"
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.10"
|
python-versions = "^3.10"
|
||||||
content-hash = "270b70e2eeff44e2b0bb07902f333dcbc45bb8c453a16769f9fd45b5a70c4993"
|
content-hash = "37cd0a0c44f8d8dc60f31db3cd1c01303d4ca07f85282c6a1823ed8135714313"
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ requests = "^2.28.2"
|
|||||||
nextcord = "^2.4.1"
|
nextcord = "^2.4.1"
|
||||||
openai = "^0.27.2"
|
openai = "^0.27.2"
|
||||||
beautifulsoup4 = "^4.11.2"
|
beautifulsoup4 = "^4.11.2"
|
||||||
|
pdftotext = "^2.2.2"
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
ipython = "^8.11.0"
|
ipython = "^8.11.0"
|
||||||
|
|||||||
Reference in New Issue
Block a user