Skip to content

Commit

Permalink
deploy
Browse files Browse the repository at this point in the history
  • Loading branch information
JUNIORCO committed Sep 5, 2024
1 parent 9daf54e commit 1688cf2
Show file tree
Hide file tree
Showing 8 changed files with 1,411 additions and 18 deletions.
30 changes: 15 additions & 15 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
{
"cursor.cpp.disabledLanguages": ["markdown"],
"files.autoSave": "onFocusChange",
"files.autoSaveDelay": 100,
"editor.codeActionsOnSave": {
"source.organizeImports.biome": "explicit"
},
"editor.formatOnSave": true,
"editor.defaultFormatter": "biomejs.biome",
"editor.linkedEditing": true,
"explorer.confirmDragAndDrop": false,
"typescript.updateImportsOnFileMove.enabled": "always",
"workbench.tree.indent": 16,
"editor.tabSize": 2,
"terminal.integrated.tabs.enabled": true,
"terminal.integrated.tabs.hideCondition": "never"
"cursor.cpp.disabledLanguages": ["markdown"],
"files.autoSave": "afterDelay",
"files.autoSaveDelay": 100,
"editor.codeActionsOnSave": {
"source.organizeImports.biome": "explicit"
},
"editor.formatOnSave": true,
"editor.defaultFormatter": "biomejs.biome",
"editor.linkedEditing": true,
"explorer.confirmDragAndDrop": false,
"typescript.updateImportsOnFileMove.enabled": "always",
"workbench.tree.indent": 16,
"editor.tabSize": 2,
"terminal.integrated.tabs.enabled": true,
"terminal.integrated.tabs.hideCondition": "never"
}
34 changes: 32 additions & 2 deletions app/components/create-pyng/create-pyng-form.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,7 @@ export default function CreatePyngForm({
handleSubmit,
formState: { isSubmitting },
reset,
watch,
} = useFormContext<IFormInput>();
console.log(watch());

const disabled = isSubmitting;

Expand Down Expand Up @@ -104,6 +102,38 @@ export default function CreatePyngForm({
isDisabled={disabled}
rules={{
required: true,
validate: (value) => {
const socialMediaBlocklist = [
"facebook.com",
"x.com",
"twitter.com",
"instagram.com",
"linkedin.com",
"snapchat.com",
"tiktok.com",
"reddit.com",
"tumblr.com",
"flickr.com",
"whatsapp.com",
"wechat.com",
"telegram.org",
"researchhub.com",
"youtube.com",
"corterix.com",
"southwest.com",
"ryanair.com",
];

const lowerCaseValue = value.toLowerCase();

for (const blocklistItem of socialMediaBlocklist) {
if (lowerCaseValue.includes(blocklistItem)) {
return "We can't use this URL for policy reasons.";
}
}

return true;
},
pattern: {
value:
/[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)/,
Expand Down
1 change: 1 addition & 0 deletions app/components/create-pyng/for-input.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export default function ForInput({
...props
}: UseControllerProps<IFormInput> & { isDisabled: boolean }) {
const { field, fieldState } = useController(props);
console.log("fieldState: ", fieldState.error);

return (
<div className="w-full">
Expand Down
31 changes: 31 additions & 0 deletions app/trigger/html-to-text.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import { JSDOM } from "jsdom";

export function htmlToText(html: string): string {
const {
window: { document },
} = new JSDOM(html);

// Remove script and style elements
const scripts = document.getElementsByTagName("script");
const styles = document.getElementsByTagName("style");

console.log("scripts: ", scripts.length);
console.log("styles: ", styles.length);
console.log("body: ", document.body.textContent?.length);
console.log("body: ", document.body.textContent?.slice(0, 4000));

for (const script of Array.from(scripts)) script.remove();
for (const style of Array.from(styles)) style.remove();

// Get the text content
const uncleanedContent = document.body.textContent || "";

console.log("x length: ", uncleanedContent.length);
console.log("x: ", uncleanedContent.slice(0, 2000));

return uncleanedContent
.replace(/\n\s*\n/g, "\n\n") // Replace newlines with surrounding whitespace with double newlines
.replace(/[^\S\n]+/g, " ") // Replace other whitespace (except newlines) with a single space
.replace(/^ +| +$/gm, "") // Remove leading/trailing spaces from each line
.trim();
}
24 changes: 23 additions & 1 deletion app/trigger/scrape.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
const scrape = async (url: string): Promise<string> => {
import { htmlToText } from "./html-to-text";

const firecrawlScrape = async (url: string): Promise<string> => {
const options = {
method: "POST",
headers: {
Expand All @@ -24,4 +26,24 @@ const scrape = async (url: string): Promise<string> => {
return scrapeResponse.data.markdown;
};

const scrape = async (url: string): Promise<string> => {
try {
return await firecrawlScrape(url);
} catch (error) {
console.error("Error scraping url with firecrawl: ", url, error);
}

console.log("Scraping url with fetch...");
const res = await fetch(url);
const html = await res.text();
console.log("length of html: ", html.length);
console.log("html: ", html.slice(0, 1000));

const text = htmlToText(html);

console.log("length of text: ", text.length);
console.log("Text: ", text.slice(0, 1000));
return text;
};

export default scrape;
Binary file modified bun.lockb
Binary file not shown.
Loading

0 comments on commit 1688cf2

Please sign in to comment.