diff --git a/README.md b/README.md index feca577..0c0732d 100644 --- a/README.md +++ b/README.md @@ -136,13 +136,14 @@ EXTRACTOR: FILTERS: -fc, -filter-code string filter response with specified status code (-fc 403,401) + -fep, -filter-error-page filter response with ML based error page detection -fl, -filter-length string filter response with specified content length (-fl 23,33) -flc, -filter-line-count string filter response body with specified line count (-flc 423,532) -fwc, -filter-word-count string filter response body with specified word count (-fwc 423,532) -ffc, -filter-favicon string[] filter response with specified favicon hash (-mfc 1494302000) -fs, -filter-string string filter response with specified string (-fs admin) -fe, -filter-regex string filter response with specified regex (-fe admin) - -fcdn, -filter-cdn string[] filter host with specified cdn provider (incapsula, oracle, google, azure, cloudflare, cloudfront, fastly, akamai, sucuri, leaseweb) + -fcdn, -filter-cdn string[] filter host with specified cdn provider (google, leaseweb, stackpath, cloudfront, fastly) -frt, -filter-response-time string filter response with specified response time in seconds (-frt '> 1') -fdc, -filter-condition string filter response with dsl expression condition @@ -349,6 +350,28 @@ https://support.hackerone.com [301,302,301,200] [HackerOne] [Cloudflare,Ruby on https://resources.hackerone.com [301,301,404] [Sorry, no Folders found.] ``` +### Error Page Classifier and Filtering + +The Error Page Classifier and Filtering feature aims to add intelligence to the tool by enabling it to classify and filter out common error pages returned by web applications. It is an enhancement to the existing httpx capabilities and is geared towards reducing the noise in the results and helping users focus on what matters most. + +```console +httpx -l urls.txt -path /v1/api -fep + + __ __ __ _ __ + / /_ / /_/ /_____ | |/ / + / __ \/ __/ __/ __ \| / + / / / / /_/ /_/ /_/ / | +/_/ /_/\__/\__/ .___/_/|_| + /_/ + + projectdiscovery.io + +[INF] Current httpx version v1.3.3 (latest) +https://scanme.sh/v1/api +``` + +Filtered error pages are stored to predefined file `filtered_error_page.json` in jsonline format when `-filter-error-page` option is used. + ### Favicon Hash diff --git a/common/errorpageclassifier/clf.gob b/common/errorpageclassifier/clf.gob new file mode 100644 index 0000000..f697862 Binary files /dev/null and b/common/errorpageclassifier/clf.gob differ diff --git a/common/errorpageclassifier/dataset.txt b/common/errorpageclassifier/dataset.txt new file mode 100644 index 0000000..1f9ee4e --- /dev/null +++ b/common/errorpageclassifier/dataset.txt @@ -0,0 +1,201 @@ +The Forum page seems to have a glitch. Our technicians are on it.||error +There was a problem with the Product Details page. Try reloading.||error +Error 500: The E-books page is experiencing a problem.||error +Unfortunately, the Video Tutorials page is down for maintenance.||error +Our Archive page is currently unavailable. We apologize for the inconvenience.||error +We're having trouble loading the Membership Details page.||error +An error occurred while trying to access the Profile Settings page.||error +Error 404: The Team page could not be found.||error +Our Project Highlights page seems to be having some technical issues.||error +We're sorry, but we can't seem to find the Donations page.||error +You've landed on our Forum page. Engage in interesting discussions.||nonerror +Welcome to the Product Details page. Learn more about our products here.||nonerror +You are now on our E-books page. Enjoy a wealth of knowledge.||nonerror +This is the Video Tutorials page. Learn with our easy-to-follow videos.||nonerror +Welcome to our Archive. Dive into our rich history.||nonerror +You're now on the Membership Details page. See the benefits of joining us.||nonerror +This is your Profile Settings page. Update your personal details as needed.||nonerror +You're on the Team page. Meet the people behind our organization.||nonerror +Welcome to our Project Highlights page. See what we've been up to.||nonerror +You've landed on the Donations page. Every contribution helps us do more.||nonerror +500 - Server Error This is highly unusual! Our tech team have been notified and are working on it.||error +Sorry this page is currently under maintenance.||error +Access Denied - You don't have permission to access this page.||error +This page seems to be missing 404 Error!||error +Sorry something went wrong. Please try again later.||error +We're sorry this page could not be found 404.||error +The page you requested could not be found on our site.||error +500 - Internal server error. There is a problem with the resource you are looking for and it cannot be displayed.||error +Error 401 Unauthorized: Access is denied due to invalid credentials.||error +Bad request 400. Your browser sent a request that this server could not understand.||error +This is a 404 error page||error +Sorry this page does not exist||error +Error 500: Internal Server Error||error +Oops! That page can’t be found.Try searching from the field above or go to the home page.||error +An error has occurred while processing your request. It happens to the best of us! Don't worry! There are no bugs without a fix! Let's try again! What were you looking for? If you are an adventurer search this site! If difficulties persist please contact the website administrator and report the error below. 404 Page not found||error +Whoops our bad... The page you requested was not found and we have a fine guess why. If you typed the URL directly please make sure the spelling is correct. If you clicked on a link to get here the link is outdated. What can you do? Have no fear help is near! There are many ways you can get back on track with Magento Store. Go back to the previous page. Use the search bar at the top of the page to search for your products. Follow these links to get you back on track! Store Home My Account||error +404 - Page not found Unfortunately the requested page could not be found.||error +PAGE NOT FOUND The page you're looking for doesn't seem to exist anymore… Return to the homepage||error +Who moved my... lemon? Oh no - looks like we can't find the page you are looking for. But you know the saying; when life gives you lemons... okay we can't find a clever way to end that sentence but we do have 2 suggestions to help you find what you were looking for: Go to the front page Or Search for a specific topic If something you need really is missing we would love it if you would let us know ❤️️||error +404—page not found||error +Apologies but there's a 503 Service Unavailable error. The server cannot handle the request.||error +Sorry you don't have access rights to this page. Error 403: Forbidden.||error +404 - Oops! The page you are looking for has been misplaced.||error +Sorry the server encountered an unexpected condition that prevented it from fulfilling the request. Error 500: Internal Server Error.||error +Whoa! The page you're looking for seems to have vanished. Error 404.||error +Sorry this page has moved or doesn't exist anymore. Error 404.||error +Sorry but your request timed out. Please try again. Error 504: Gateway Timeout.||error +We're sorry but an unknown error occurred while processing your request.||error +Error 502: Bad Gateway. The server encountered a temporary error and could not complete your request.||error +The requested resource could not be found on this server. Please verify your request and try again. Error 404.||error +This Help Center page is temporarily unavailable.||error +Privacy Policy page not found. Please try again later.||error +There seems to be an error on our Services page. We're working to fix it.||error +An error occurred while loading the Search Results page.||error +Category page not found. It might have been removed or relocated.||error +There was a problem loading the Cart page. Please try again.||error +Our Terms of Service page is currently down for maintenance.||error +We're sorry, but the Sitemap is not available at the moment.||error +We're having trouble loading the Reviews page.||error +An error occurred while trying to access the Partners page.||error +Settings page is currently unavailable. We apologize for the inconvenience.||error +Error 404: Resources page not found.||error +Our Press Releases page seems to be having some technical issues.||error +We're sorry, but we can't seem to find the Case Studies page.||error +There was a problem loading the Community page. Please refresh the page.||error +Error 503: The Subscriptions page is temporarily unavailable.||error +There's a problem with our Customer Support page. We're on it.||error +We're having trouble finding the Notifications page. It may have been moved.||error +There was a problem with the Feedback page. Try again later.||error +Our Transactions page is currently experiencing some issues. We appreciate your patience.||error +Your request has been successfully submitted.||nonerror +You have successfully logged out.||nonerror +Congratulations on successfully completing the course!||nonerror +The payment has been processed successfully.||nonerror +Thank you for your feedback!||nonerror +Your download will start shortly.||nonerror +Profile updated successfully.||nonerror +Thanks for contacting us! We'll get back to you as soon as possible.||nonerror +Sign-up successful. Welcome to our community!||nonerror +Your booking has been confirmed. Check your email for details.||nonerror +Welcome! Your registration was successful.||nonerror +Congratulations! You've successfully updated your profile.||nonerror +Great! Your order was placed successfully. We'll send you an email confirmation soon.||nonerror +Welcome back! Your login was successful.||nonerror +Success! You've added the item to your cart.||nonerror +Your request was sent successfully. We'll get back to you as soon as possible.||nonerror +Great job! Your settings have been saved.||nonerror +Your message has been submitted successfully. We appreciate your feedback.||nonerror +Thank you for subscribing to our newsletter!||nonerror +Great news! Your transaction was successful.||nonerror +Welcome to our homepage. Feel free to browse around||nonerror +Thanks for signing up! You're now a registered user.||nonerror +Your order has been placed successfully! You'll receive a confirmation email shortly||nonerror +Congratulations your account has been successfully created||nonerror +Thank you for your inquiry. We will respond to your message within 24 hours||nonerror +You've successfully added the item to your cart!||nonerror +Success! Your password has been updated||nonerror +Welcome back! You have successfully logged in||nonerror +Great job! Your profile has been updated||nonerror +Your message was sent successfully. We'll get back to you shortly||nonerror +Welcome to our website. Explore and enjoy our services.||nonerror +Thank you for visiting our About Us page. Learn more about our journey and team.||nonerror +You are now browsing our Products page. Check out our latest offerings.||nonerror +This is our Contact Us page. Feel free to reach out with any queries or feedback.||nonerror +You have reached the end of the page. Scroll up to continue browsing.||nonerror +Welcome to the News section. Stay updated with our latest announcements.||nonerror +Now viewing: Image Gallery. Enjoy a visual tour of our activities.||nonerror +You're on our FAQ page. Get answers to common questions.||nonerror +Welcome to the Blog section. Engage with our thoughts and insights.||nonerror +This is the Discussion Forum. Join in, ask questions, or help others.||nonerror +You're on the Login page. Enter your credentials to access your account.||nonerror +Welcome to the Sign-Up page. Join our community today.||nonerror +This is your User Dashboard. Manage your account and settings here.||nonerror +You've reached the Checkout page. Review your order and proceed to payment.||nonerror +Welcome to the Download section . Access our digital resources here.||nonerror +This is the Careers page. Explore job opportunities with us.||nonerror +You're viewing the Events Calendar. Keep track of upcoming activities.||nonerror +This is the User Profile page. Update your information as needed.||nonerror +Welcome to our Testimonials page. Read reviews and stories from our users.||nonerror +You are now on the Home page. Start exploring from here.||nonerror +Welcome to home page||nonerror +You're now on our Help Center page. Find answers to common questions here.||nonerror +Welcome to our Privacy Policy page. Learn how we protect your personal information.||nonerror +You've landed on the Services page. Explore what we have to offer.||nonerror +This is the Search Results page. Did you find what you were looking for?||nonerror +Now browsing the Category page. View all items in this category.||nonerror +You're now on the Cart page. Review your selections before proceeding to checkout.||nonerror +Welcome to our Terms of Service page. Understand our conditions for providing services.||nonerror +You are currently on our Sitemap. Navigate our website with ease.||nonerror +You are on the Reviews page. Check out what others have to say about us.||nonerror +Now viewing the Partners page. Meet the organizations we collaborate with.||nonerror +You're on the Settings page. Customize your user experience.||nonerror +This is our Resources page. Access useful documents and guides.||nonerror +You've landed on the Press Releases page. Stay updated with our latest news.||nonerror +Welcome to our Case Studies page. Discover our past projects and achievements.||nonerror +You're now on the Community page. Connect and interact with other members.||nonerror +You are currently on the Subscriptions page. Manage your preferences here.||nonerror +Now viewing the Customer Support page. We're here to help.||nonerror +This is the Notifications page. Keep track of your updates and alerts.||nonerror +You've landed on the Feedback page. Share your thoughts with us.||nonerror +Welcome to the Transactions page. Monitor your past and current transactions.||nonerror +500 - Server Error This is highly unusual! Our tech team have been notified and are working on it.||error +Sorry this page is currently under maintenance.||error +Access Denied - You don't have permission to access this page.||error +This page seems to be missing 404 Error!||error +Sorry something went wrong. Please try again later.||error +We're sorry this page could not be found 404.||error +The page you requested could not be found on our site.||error +500 - Internal server error. There is a problem with the resource you are looking for and it cannot be displayed.||error +Error 401 Unauthorized: Access is denied due to invalid credentials.||error +Bad request 400. Your browser sent a request that this server could not understand.||error +Your request has been successfully submitted.||nonerror +You have successfully logged out.||nonerror +Congratulations on successfully completing the course!||nonerror +The payment has been processed successfully.||nonerror +Thank you for your feedback!||nonerror +Your download will start shortly.||nonerror +Profile updated successfully.||nonerror +Thanks for contacting us! We'll get back to you as soon as possible.||nonerror +Sign-up successful. Welcome to our community!||nonerror +Your booking has been confirmed. Check your email for details.||nonerror +This is a 404 error page||error +Sorry this page does not exist||error +Error 500: Internal Server Error||error +Oops! That page can’t be found.Try searching from the field above or go to the home page.||error +An error has occurred while processing your request. It happens to the best of us! Don't worry! There are no bugs without a fix! Let's try again! What were you looking for? If you are an adventurer search this site! If difficulties persist please contact the website administrator and report the error below. 404 Page not found||error +Whoops our bad... The page you requested was not found and we have a fine guess why. If you typed the URL directly please make sure the spelling is correct. If you clicked on a link to get here the link is outdated. What can you do? Have no fear help is near! There are many ways you can get back on track with Magento Store. Go back to the previous page. Use the search bar at the top of the page to search for your products. Follow these links to get you back on track! Store Home | My Account||error +404 - Page not found Unfortunately the requested page could not be found.||error +PAGE NOT FOUND The page you're looking for doesn't seem to exist anymore… Return to the homepage||error +Who moved my... lemon? Oh no - looks like we can't find the page you are looking for. But you know the saying; when life gives you lemons... okay we can't find a clever way to end that sentence but we do have 2 suggestions to help you find what you were looking for: Go to the front page Or Search for a specific topic If something you need really is missing we would love it if you would let us know ❤️️||error +404—page not found||error +Apologies but there's a 503 Service Unavailable error. The server cannot handle the request.||error +Sorry you don't have access rights to this page. Error 403: Forbidden.||error +404 - Oops! The page you are looking for has been misplaced.||error +Sorry the server encountered an unexpected condition that prevented it from fulfilling the request. Error 500: Internal Server Error.||error +Whoa! The page you're looking for seems to have vanished. Error 404.||error +Sorry this page has moved or doesn't exist anymore. Error 404.||error +Sorry but your request timed out. Please try again. Error 504: Gateway Timeout.||error +We're sorry but an unknown error occurred while processing your request.||error +Error 502: Bad Gateway. The server encountered a temporary error and could not complete your request.||error +The requested resource could not be found on this server. Please verify your request and try again. Error 404.||error +Welcome! Your registration was successful.||nonerror +Congratulations! You've successfully updated your profile.||nonerror +Great! Your order was placed successfully. We'll send you an email confirmation soon.||nonerror +Welcome back! Your login was successful.||nonerror +Success! You've added the item to your cart.||nonerror +Your request was sent successfully. We'll get back to you as soon as possible.||nonerror +Great job! Your settings have been saved.||nonerror +Your message has been submitted successfully. We appreciate your feedback.||nonerror +Thank you for subscribing to our newsletter!||nonerror +Great news! Your transaction was successful.||nonerror +Welcome to our homepage. Feel free to browse around||nonerror +Thanks for signing up! You're now a registered user.||nonerror +Your order has been placed successfully! You'll receive a confirmation email shortly||nonerror +Congratulations your account has been successfully created||nonerror +Thank you for your inquiry. We will respond to your message within 24 hours||nonerror +You've successfully added the item to your cart!||nonerror +Success! Your password has been updated||nonerror +Welcome back! You have successfully logged in||nonerror +Great job! Your profile has been updated||nonerror +Your message was sent successfully. We'll get back to you shortly||nonerror \ No newline at end of file diff --git a/common/errorpageclassifier/errorpageclassifier.go b/common/errorpageclassifier/errorpageclassifier.go new file mode 100644 index 0000000..d916d7c --- /dev/null +++ b/common/errorpageclassifier/errorpageclassifier.go @@ -0,0 +1,39 @@ +package errorpageclassifier + +import ( + _ "embed" + + "github.com/jaytaylor/html2text" + "github.com/projectdiscovery/utils/ml/naive_bayes" +) + +//go:embed clf.gob +var classifierData []byte + +type ErrorPageClassifier struct { + classifier *naive_bayes.NaiveBayesClassifier +} + +func New() *ErrorPageClassifier { + classifier, err := naive_bayes.NewClassifierFromFileData(classifierData) + if err != nil { + panic(err) + } + return &ErrorPageClassifier{classifier: classifier} +} + +func (n *ErrorPageClassifier) Classify(html string) string { + text := htmlToText(html) + if text == "" { + return "other" + } + return n.classifier.Classify(text) +} + +func htmlToText(html string) string { + text, err := html2text.FromString(html, html2text.Options{TextOnly: true}) + if err != nil { + panic(err) + } + return text +} diff --git a/common/errorpageclassifier/errorpageclassifier_test.go b/common/errorpageclassifier/errorpageclassifier_test.go new file mode 100644 index 0000000..35923b2 --- /dev/null +++ b/common/errorpageclassifier/errorpageclassifier_test.go @@ -0,0 +1,53 @@ +package errorpageclassifier + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestErrorPageClassifier(t *testing.T) { + t.Run("test creation of new ErrorPageClassifier", func(t *testing.T) { + epc := New() + assert.NotNil(t, epc) + }) + + t.Run("test classification non error page text", func(t *testing.T) { + epc := New() + assert.Equal(t, "nonerror", epc.Classify(` + +
+ +Understand our conditions for providing services.
+ + + `)) + }) + + t.Run("test classification on error page text", func(t *testing.T) { + epc := New() + assert.Equal(t, "error", epc.Classify(` + + +