pax_global_header00006660000000000000000000000064145101122740014507gustar00rootroot0000000000000052 comment=45c78f1b4de6e30ce18a7338fd6d200d60be1a05 nahsra-antisamy-45c78f1/000077500000000000000000000000001451011227400151515ustar00rootroot00000000000000nahsra-antisamy-45c78f1/.github/000077500000000000000000000000001451011227400165115ustar00rootroot00000000000000nahsra-antisamy-45c78f1/.github/dependabot.yml000066400000000000000000000006621451011227400213450ustar00rootroot00000000000000version: 2 updates: - package-ecosystem: "github-actions" directory: "/" schedule: interval: "daily" - package-ecosystem: "maven" directory: "/" schedule: interval: "daily" open-pull-requests-limit: 10 labels: - dependencies ignore: - dependency-name: xml-apis:xml-apis versions: - "> 1.4.01" - dependency-name: commons-io:commons-io versions: - "> 2.6" nahsra-antisamy-45c78f1/.github/workflows/000077500000000000000000000000001451011227400205465ustar00rootroot00000000000000nahsra-antisamy-45c78f1/.github/workflows/codeql-analysis.yml000066400000000000000000000043751451011227400243720ustar00rootroot00000000000000# For most projects, this workflow file will not need changing; you simply need # to commit it to your repository. # # You may wish to alter this file to override the set of languages analyzed, # or to provide custom queries or build logic. name: "CodeQL" on: push: branches: [main] paths: - 'src/**' - '!src/test/**' pull_request: # The branches below must be a subset of the branches above branches: [main] paths: - 'src/**' - '!src/test/**' schedule: - cron: '0 9 * * 5' jobs: analyze: name: Analyze runs-on: ubuntu-latest strategy: fail-fast: false matrix: language: ['java', 'javascript'] steps: - name: Checkout repository uses: actions/checkout@v4 with: # Change from default script: Perform a deep clone so origin/main can be found. fetch-depth: 0 # If this run was triggered by a pull request event, then checkout # the head of the pull request instead of the merge commit. - run: git checkout HEAD^2 if: ${{ github.event_name == 'pull_request' }} # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@v2 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. # By default, queries listed here will override any specified in a config file. # Prefix the list here with "+" to use these queries and those in the config file. # queries: ./path/to/local/query, your-org/your-repo/queries@main # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild uses: github/codeql-action/autobuild@v2 # ℹ️ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines # and modify them (or add more) to build your code if your project # uses a compiled language #- run: | # make bootstrap # make release - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v2 nahsra-antisamy-45c78f1/.github/workflows/maven.yml000066400000000000000000000006021451011227400223750ustar00rootroot00000000000000# Run mvn test to ensure all tests pass name: Java CI with Maven on: [push, pull_request] jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - name: Set up JDK 1.8 uses: actions/setup-java@v3 with: java-version: 8 distribution: zulu - name: Run unit tests run: mvn test nahsra-antisamy-45c78f1/.github/workflows/shiftleft-analysis.yml000066400000000000000000000025401451011227400251030ustar00rootroot00000000000000# This workflow integrates Scan with GitHub's code scanning feature # Scan is a free open-source security tool for modern DevOps teams from ShiftLeft # Visit https://slscan.io/en/latest/integrations/code-scan for help # Force a new scan by SL name: SL Scan # This section configures the trigger for the workflow. Feel free to customize depending on your convention on: push jobs: Scan-Build: # Scan runs on ubuntu, mac and windows runs-on: ubuntu-latest permissions: security-events: write steps: - uses: actions/checkout@v4 # Instructions # 1. Setup JDK, Node.js, Python etc depending on your project type # 2. Compile or build the project before invoking scan # Example: mvn compile, or npm install or pip install goes here # 3. Invoke Scan with the github token. Leave the workspace empty to use relative url - name: Perform Scan uses: ShiftLeftSecurity/scan-action@master env: WORKSPACE: "" GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} SCAN_AUTO_BUILD: true with: output: reports # Scan auto-detects the languages in your project. To override uncomment the below variable and set the type # type: credscan,java # type: python - name: Upload report uses: github/codeql-action/upload-sarif@v2 with: sarif_file: reports nahsra-antisamy-45c78f1/.gitignore000066400000000000000000000001221451011227400171340ustar00rootroot00000000000000target/ .project .settings .DS_Store .classpath .java-version .idea antisamy.iml nahsra-antisamy-45c78f1/DevStyleXml.prefs000066400000000000000000000001601451011227400204270ustar00rootroot00000000000000eclipse.preferences.version=1 indentationChar=space indentationSize=4 lineWidth=140 formatCommentJoinLines=true nahsra-antisamy-45c78f1/LICENSE000066400000000000000000000027271451011227400161660ustar00rootroot00000000000000Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. nahsra-antisamy-45c78f1/README.md000066400000000000000000000246501451011227400164370ustar00rootroot00000000000000# AntiSamy A library for performing fast, configurable cleansing of HTML coming from untrusted sources. Supports Java 8+. Another way of saying that could be: It's an API that helps you make sure that clients don't supply malicious cargo code in the HTML they supply for their profile, comments, etc., that get persisted on the server. The term "malicious code" in regards to web applications usually mean "JavaScript." Mostly, Cascading Stylesheets are only considered malicious when they invoke JavaScript. However, there are many situations where "normal" HTML and CSS can be used in a malicious manner. ## IMPORTANT! - API breaking changes in 1.7.0 Throughout the development of the 1.6.x series, we have identified and deprecated a number of features and APIs. All of these deprecated items have been removed in the 1.7.0 release. These changes were all tracked in ticket: https://github.com/nahsra/antisamy/issues/195. Each of the changes are described below: `CssHandler` had 2 constructors which dropped the `LinkedList embeddedStyleSheets` parameter. Both constructors now create an empty internal `LinkedList` and the method `getImportedStylesheetsURIList()` can be used to get a reference to it, if needed. This feature is rarely used, and in fact direct invocation of these constructors is also rare, so this change is unlikely to affect most users of AntiSamy. When used, normally an empty list is passed in as this parameter value and that list is never used again. * The `CssHandler(Policy, LinkedList, List, ResourceBundle)` signature was dropped * It was replaced with: `CssHandler(Policy, List, ResourceBundle)` * The `CssHandler(Policy, LinkedList, List, String, ResourceBundle)` signature was dropped * It was replaced with: `CssHandler(Policy, List, ResourceBundle, String)`. NOTE: The order of the last 2 parameters to this method was reversed. * Support for XHTML was dropped. AntiSamy now only supports HTML. As we believe this was a rarely used feature, we don't expect this to affect many AntiSamy users. * XML Schema validation is now required on AntiSamy policy files and cannot be disabled. You must make your policy file schema compliant in order to use it with AntiSamy. * The policy directive `noopenerAndNoreferrerAnchors` is now ON by default. If it is disabled, AntiSamy issues a nag, encouraging you to enable it. ## Deprecating support for external stylesheets The AntiSamy team has decided that supporting the ability to allow embedded remote CSS is dangerous and so we are deprecating this feature and it will be removed in a future release. It is expected that there are very few, if any, users of this feature. We have added a log WARNing if this feature is invoked. If you are, please disable/remove this feature by switching to the primary `CssScanner` constructor that does not enable this feature. ## How to Use ### 1. Import the dependency First, add the dependency from Maven: ```xml org.owasp.antisamy antisamy LATEST_VERSION ``` ### 2. Choosing a base policy file Chances are that your site’s use case for AntiSamy is at least roughly comparable to one of the predefined policy files. They each represent a "typical" scenario for allowing users to provide HTML (and possibly CSS) formatting information. Let’s look into the different policy files: 1) antisamy-slashdot.xml Slashdot is a techie news site that allows users to respond anonymously to news posts with very limited HTML markup. Now, Slashdot is not only one of the coolest sites around, it’s also one that’s been subject to many different successful attacks. The rules for Slashdot are fairly strict: users can only submit the following HTML tags and no CSS: ``, ``, ``, ``, `
`. Accordingly, we’ve built a policy file that allows fairly similar functionality. All text-formatting tags that operate directly on the font, color, or emphasis have been allowed. 2) antisamy-ebay.xml eBay is the most popular online auction site in the universe, as far as we can tell. It is a public site so anyone is allowed to post listings with rich HTML content. It’s not surprising that given the attractiveness of eBay as a target that it has been subject to a few complex XSS attacks. Listings are allowed to contain much more rich content than, say, Slashdot -- so it’s attack surface is considerably larger. 3) antisamy-myspace.xml MySpace was, at the time this project was born, the most popular social networking site. Users were allowed to submit pretty much all the HTML and CSS they wanted -- as long as it didn’t contain JavaScript. MySpace was using a word blacklist to validate users’ HTML, which is why they were subject to the infamous Samy worm. The Samy worm, which used fragmentation attacks combined with a word that should have been blacklisted (eval) - was the inspiration for this project. 4) antisamy-anythinggoes.xml We don’t know of a possible use case for this policy file. If you wanted to allow every single valid HTML and CSS element (but without JavaScript or blatant CSS-related phishing attacks), you can use this policy file. Not even MySpace was this crazy. However, it does serve as a good reference because it contains base rules for every element, so you can use it as a knowledge base when using tailoring the other policy files. ### Logging AntiSamy now includes the slf4j-simple library for its logging, but AntiSamy users can import and use an alternate slf4j compatible logging library if they prefer. They can also then exclude slf4j-simple if they want to. WARNING: AntiSamy's use of slf4j-simple, without any configuration file, logs messages in a buffered manner to standard output. As such, some or all of these log messages may get lost if an `Exception`, such as a `PolicyException` is thrown. This can likely be rectified by configuring slf4j-simple to log to standard error instead, or use an alternate slf4j logger that does so. ### 3. Tailoring the policy file You may want to deploy AntiSamy in a default configuration, but it’s equally likely that a site may want to have strict, business-driven rules for what users can allow. The discussion that decides the tailoring should also consider attack surface - which grows in relative proportion to the policy file. Example policies can be adapted and tested based on the requirements for each tag. The supported tag actions that can be specified are: - `filter`: remove tags, but keep content. - `validate`: keep content as long as it passes rules. - `remove`: remove tag and contents. - `truncate`: remove tag attributes and all child tags except por its text content if any. - `encode`: similar to filter but it encodes the tag for HTML to preserve it as raw text and its children are moved up one level in the hierarchy. ### 4. Calling the AntiSamy API Using AntiSamy is easy. Here is an example of invoking AntiSamy with a policy file: ``` import org.owasp.validator.html.*; Policy policy = Policy.getInstance(POLICY_FILE_LOCATION); AntiSamy as = new AntiSamy(); CleanResults cr = as.scan(dirtyInput, policy); MyUserDAO.storeUserProfile(cr.getCleanHTML()); // some custom function ``` There are a few ways to create a `Policy` object. The `getInstance()` method can take any of the following: * a `String` filename * a `File` object * an `InputStream` * `Policy` files can also be referenced by filename by passing a second argument to the `AntiSamy#scan()` method as the following examples show: ``` AntiSamy as = new AntiSamy(); CleanResults cr = as.scan(dirtyInput, policyFilePath); ``` Finally, policy files can also be referenced by `File` objects directly in the second parameter: ``` AntiSamy as = new AntiSamy(); CleanResults cr = as.scan(dirtyInput, new File(policyFilePath)); ``` ### 5. Analyzing CleanResults The `CleanResults` object provides a lot of useful stuff. * `getCleanHTML()` - the clean, safe HTML output * `getCleanXMLDocumentFragment()` - the clean, safe `XMLDocumentFragment` which is reflected in `getCleanHTML()` * `getErrorMessages()` - a list of String error messages -- *if this returns 0 that does not mean there were no attacks!* * `getNumberOfErrors()` - the number of error messages -- *Again, 0 does not mean the input was safe!* * `getScanTime()` - returns the scan time in seconds __Important Note__: There has been much confusion about the `getErrorMessages()` method. The `getErrorMessages()` method (nor `getNumberOfErrors()`) does not subtly answer the question "is this safe input?" in the affirmative if it returns an empty list. You must always use the sanitized input and there is no way to be sure the input passed in had no attacks. The serialization and deserialization process that is critical to the effectiveness of the sanitizer is purposefully lossy and will filter out attacks via a number of attack vectors. Unfortunately, one of the tradeoffs of this strategy is that AntiSamy doesn't always know in retrospect that an attack was seen. Thus, the `getErrorMessages()` and `getNumberOfErrors()` APIs are there to help users understand whether their well-intentioned input meets the requirements of the system, not help a developer detect if an attack was present. ## Other Documentation Additional documentation is available on this GitHub project's wiki page: https://github.com/nahsra/antisamy/wiki and the OWASP AntiSamy Project Page: https://owasp.org/www-project-antisamy/ ## Contributing to AntiSamy ### Find an Issue? If you have found a bug, then create an issue in the AntiSamy repo: https://github.com/nahsra/antisamy/issues ### Find a Vulnerability? If you have found a vulnerability in AntiSamy, first search the issues list (see above) to see if it has already been reported. If it has not, then please contact Dave Wichers (dave.wichers at owasp.org) directly. Please do not report vulnerabilities via GitHub issues as we wish to keep our users secure while a patch is implemented and deployed. If you wish to be acknowledged for finding the vulnerability, then please follow this process. More detail is available in the file: [SECURITY.md](https://github.com/nahsra/antisamy/blob/main/SECURITY.md). ## How to Build You can build and test from source pretty easily: ``` $ git clone https://github.com/nahsra/antisamy $ cd antisamy $ mvn package ``` ## License Released under the [BSD-3-Clause](https://opensource.org/licenses/BSD-3-Clause) license as specified here: [LICENSE](https://github.com/nahsra/antisamy/blob/main/LICENSE). nahsra-antisamy-45c78f1/SECURITY.md000066400000000000000000000061341451011227400167460ustar00rootroot00000000000000# Security Policy ## Reporting a Vulnerability If you believe that you have found a vulnerability in AntiSamy, first please search the GitHut issues list (for both open and closed issues) to see if it has already been reported. If it has not, then please contact Dave Wichers (dave.wichers at owasp.org) _directly_. Please do **not** report any suspected vulnerabilities via GitHub issues as we wish to keep our users secure while a patch is implemented and deployed. This is because if this is reported as a GitHub issue, it more or less is equivalent to dropping a 0-day on all applications using AntiSamy. Instead, we encourage responsible disclosure. If you wish to be acknowledged for finding the vulnerability, then please follow this process. One of the project leaders will try to contact you within 1-2 business days. If you eventually wish to have it published as a CVE, we will also work with you to ensure that you are given proper credit with MITRE and NIST. Even if you do not wish to report the vulnerability as a CVE, we will acknowledge you when we create a GitHub issue (once the issue is patched). If possible, provide a working proof-of-concept or at least minimally describe how it can be exploited in sufficient details that the AntiSamy development team can understand what needs to be done to fix it. ## Security Bulletins These are the known CVEs reported for AntiSamy: * AntiSamy CVE #1 - CVE-2016-10006: XSS Bypass in AntiSamy before v1.5.5 - https://www.cvedetails.com/cve/CVE-2016-10006 * AntiSamy CVE #2 - CVE-2017-14735: XSS via HTML5 Entities in AntiSamy before v1.5.7 - https://www.cvedetails.com/cve/CVE-2017-14735 * AntiSamy CVE #3 - CVE-2021-35043: XSS via HTML attributes using : as replacement for : character before v1.6.4 - https://www.cvedetails.com/cve/CVE-2021-35043 * AntiSamy CVE #4 - CVE-2022-28367: AntiSamy before 1.6.6 allows XSS via HTML tag smuggling on STYLE content. https://www.cvedetails.com/cve/CVE-2022-28367. NOTE: This release only included a PARTIAL fix. * AntiSamy CVE #5 - CVE-2022-29577: AntiSamy before 1.6.7 allows XSS via HTML tag smuggling on STYLE content. - https://www.cvedetails.com/cve/CVE-2022-29577. This is the complete fix to the previous CVE. * AntiSamy CVE #6 - CVE-2023-43643: AntiSamy before 1.7.4 subject to mXSS when preserving comments. - https://www.cvedetails.com/cve/CVE-2023-43643 CVEs in AntiSamy dependencies: * AntiSamy prior to 1.6.6 used the old CyberNeko HTML library v1.9.22, which is subject to https://www.cvedetails.com/cve/CVE-2022-28366 and no longer maintained. AntiSamy 1.6.6 upgraded to an active fork of CyberNeko called HtmlUnit-Neko which fixed this CVE in v2.27 of that library. AntiSamy 1.6.6 upgraded to version 2.60.0 of HtmlUnit-Neko. * AntiSamy 1.6.8 upgraded to HtmlUnit-Neko v2.61.0 because v2.60.0 is subject to https://www.cvedetails.com/cve/CVE-2022-29546 * AntiSamy 1.7.3 upgraded to HtmlUnit-Neko v3.1.0 because all versions prior to 3.0.0 are subject to https://www.cvedetails.com/cve/CVE-2023-26119 * AntiSamy 1.7.4 upgraded to batik-css v1.17 because batik-css:1.16 is subject to https://www.cvedetails.com/cve/CVE-2022-44729 nahsra-antisamy-45c78f1/pom.xml000066400000000000000000000714121451011227400164730ustar00rootroot00000000000000 4.0.0 org.owasp.antisamy antisamy jar 1.7.4 ossrh https://oss.sonatype.org/content/repositories/snapshots ossrh https://oss.sonatype.org/service/local/staging/deploy/maven2/ OWASP AntiSamy A library for performing fast, configurable cleansing of HTML coming from untrusted sources. https://github.com/nahsra/antisamy 2010 nahsra Arshan Dabirsiaghi arshan.dabirsiaghi@gmail.com OWASP Foundation https://owasp.org/ BSD 3 https://opensource.org/licenses/BSD-3-Clause scm:git:git@github.com:nahsra/antisamy.git scm:git:git@github.com:nahsra/antisamy.git scm:git:git@github.com:nahsra/antisamy.git 2.0.0-M7 true UTF-8 2023-10-06T21:08:34Z 1.8 1.12.0 2.0.9 4.7.3.6 4.7.3 release false org.htmlunit neko-htmlunit 3.6.0 org.apache.httpcomponents.client5 httpclient5 5.2.1 org.slf4j slf4j-api org.apache.httpcomponents.core5 httpcore5 org.apache.httpcomponents.core5 httpcore5 5.2.3 org.apache.xmlgraphics batik-css 1.17 commons-io commons-io commons-logging commons-logging commons-io commons-io 2.14.0 org.slf4j slf4j-api ${version.slf4j} org.slf4j jcl-over-slf4j ${version.slf4j} test org.slf4j slf4j-simple ${version.slf4j} test xerces xercesImpl 2.12.2 xml-apis xml-apis 1.4.01 xml-apis xml-apis-ext 1.3.04 com.github.spotbugs spotbugs-annotations ${version.spotbugs} true commons-codec commons-codec 1.16.0 test junit junit 4.13.2 test org.hamcrest hamcrest-core org.hamcrest hamcrest 2.2 test org.apache.maven.plugins maven-assembly-plugin 3.6.0 org.apache.maven.plugins maven-dependency-plugin 3.6.0 commons-io:commons-io org.slf4j:jcl-over-slf4j org.slf4j:slf4j-simple org.apache.maven.plugins maven-javadoc-plugin 3.6.0 true org.apache.maven.plugins maven-release-plugin 3.0.1 org.apache.maven.plugins maven-clean-plugin 3.3.1 org.apache.maven.plugins maven-compiler-plugin 3.11.0 ${project.java.target} ${project.java.target} ${project.java.target} ${project.java.target} -Xlint:unchecked org.apache.maven.plugins maven-deploy-plugin 3.1.1 org.apache.maven.plugins maven-enforcer-plugin 3.4.1 org.codehaus.mojo extra-enforcer-rules 1.7.0 org.codehaus.mojo animal-sniffer-enforcer-rule 1.23 enforce-bytecode-version enforce ${project.java.target} true test Dependencies shouldn't require Java 9+. 3.3.9 true enforce-jdk-version enforce ${project.java.target} AntiSamy source code shouldn't require Java 9+. check-java7API-signatures compile enforce org.codehaus.mojo.signature java17 1.0 org.apache.maven.plugins maven-gpg-plugin 3.1.0 sign-artifacts verify sign org.apache.maven.plugins maven-install-plugin 3.1.1 org.apache.maven.plugins maven-jar-plugin 3.3.0 org.owasp.validator.html.AntiSamy org.owasp.validator.html true org.apache.maven.plugins maven-javadoc-plugin attach-javadocs package jar org.apache.maven.plugins maven-jxr-plugin 3.3.0 org.apache.maven.plugins maven-pmd-plugin 3.21.0 org.apache.maven.plugins maven-project-info-reports-plugin 3.4.5 org.apache.maven.plugins maven-resources-plugin 3.3.1 org.apache.maven.plugins maven-site-plugin 4.0.0-M9 org.apache.maven.skins maven-fluido-skin ${fluido.version} org.apache.maven.plugins maven-source-plugin 3.3.0 attach-sources package jar-no-fork org.apache.maven.plugins maven-surefire-plugin 3.1.2 org.codehaus.mojo versions-maven-plugin 2.16.1 org.cyclonedx cyclonedx-maven-plugin 2.7.9 package makeBom org.jacoco jacoco-maven-plugin 0.8.10 prepare-agent prepare-agent report test report jacoco-check test check false CLASS METHOD MISSEDCOUNT 0 com.diffplug.spotless spotless-maven-plugin 2.30.0 origin/main *.md target/**/*.* false true 4 **/*.xml target/**/*.* XML DevStyleXml.prefs 1.7 com.google.googlejavaformat:google-java-format spotless-apply compile apply com.github.spotbugs spotbugs-maven-plugin ${version.spotbugs.maven} com.github.spotbugs spotbugs ${version.spotbugs} com.h3xstream.findsecbugs findsecbugs-plugin ${version.findsecbugs} com.github.spotbugs spotbugs-maven-plugin com.h3xstream.findsecbugs findsecbugs-plugin ${version.findsecbugs} Max false src/test/spotbugsFilterFile.xml org.apache.maven.plugins maven-javadoc-plugin org.apache.maven.plugins maven-jxr-plugin org.apache.maven.plugins maven-pmd-plugin ${project.java.target} utf-8 org.apache.maven.plugins maven-project-info-reports-plugin index dependency-convergence false org.codehaus.mojo versions-maven-plugin dependency-updates-report plugin-updates-report property-updates-report org.jacoco jacoco-maven-plugin report nahsra-antisamy-45c78f1/src/000077500000000000000000000000001451011227400157405ustar00rootroot00000000000000nahsra-antisamy-45c78f1/src/main/000077500000000000000000000000001451011227400166645ustar00rootroot00000000000000nahsra-antisamy-45c78f1/src/main/java/000077500000000000000000000000001451011227400176055ustar00rootroot00000000000000nahsra-antisamy-45c78f1/src/main/java/org/000077500000000000000000000000001451011227400203745ustar00rootroot00000000000000nahsra-antisamy-45c78f1/src/main/java/org/owasp/000077500000000000000000000000001451011227400215255ustar00rootroot00000000000000nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/000077500000000000000000000000001451011227400235125ustar00rootroot00000000000000nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/css/000077500000000000000000000000001451011227400243025ustar00rootroot00000000000000nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/css/CssHandler.java000066400000000000000000000427011451011227400271770ustar00rootroot00000000000000/* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of OWASP nor the names of its contributors may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.css; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Collection; import java.util.LinkedList; import java.util.List; import java.util.ResourceBundle; import org.owasp.validator.html.InternalPolicy; import org.owasp.validator.html.Policy; import org.owasp.validator.html.ScanException; import org.owasp.validator.html.util.ErrorMessageUtil; import org.owasp.validator.html.util.HTMLEntityEncoder; import org.w3c.css.sac.CSSException; import org.w3c.css.sac.DocumentHandler; import org.w3c.css.sac.InputSource; import org.w3c.css.sac.LexicalUnit; import org.w3c.css.sac.SACMediaList; import org.w3c.css.sac.Selector; import org.w3c.css.sac.SelectorList; /** * A implementation of a SAC DocumentHandler for CSS validation. The appropriate validation method * is called whenever the handler is invoked by the parser. The handler also builds a clean CSS * document as the original CSS is scanned. * *

NOTE: keeping state in this class is not ideal as handler style parsing a la SAX should * generally be event driven. However, there is not a fully implemented "DOM" equivalent to CSS at * this time. Java has a StyleSheet class that could accomplish this "DOM" like behavior but it has * yet to be fully implemented. * * @see javax.swing.text.html.StyleSheet * @author Jason Li */ public class CssHandler implements DocumentHandler { /** The style sheet as it is being built by the handler */ private StringBuffer styleSheet = new StringBuffer(); /** The validator to use when CSS constituents are encountered */ private final CssValidator validator; /** The policy file to use in validation */ private final InternalPolicy policy; /** The error messages */ private final Collection errorMessages; /** The error message bundle to pull from. */ private ResourceBundle messages; /** A queue of imported stylesheets; used to track imported stylesheets */ private final LinkedList importedStyleSheets; /** The tag currently being examined (if any); used for inline stylesheet error messages */ private final String tagName; /** * Indicates whether we are scanning a stylesheet or an inline declaration. true if this is an * inline declaration; false otherwise */ private final boolean isInline; /** * Indicates whether the handler is currently parsing the contents between an open selector tag * and an close selector tag */ private boolean selectorOpen = false; /** * Constructs a handler for stylesheets using the given policy. The List of embedded stylesheets * produced by this constructor is now available via the getImportedStylesheetsURIList() method. * This constructor to be used when there is no tag name associated with this inline style. * * @param policy the policy to use * @param errorMessages the List of error messages to add error messages too if there are errors * @param messages the error message bundle to pull from */ public CssHandler(Policy policy, List errorMessages, ResourceBundle messages) { this(policy, errorMessages, messages, null); } /** * Constructs a handler for stylesheets using the given policy. The List of embedded stylesheets * produced by this constructor is available via the getImportedStylesheetsURIList() method. * * @param policy the policy to use * @param errorMessages the List of error messages to add error messages too if there are errors * @param messages the error message bundle to pull from * @param tagName the tag name associated with this inline style */ public CssHandler( Policy policy, List errorMessages, ResourceBundle messages, String tagName) { assert policy instanceof InternalPolicy : policy.getClass(); this.policy = (InternalPolicy) policy; this.errorMessages = errorMessages; this.messages = messages; this.validator = new CssValidator(policy); // Create a queue of all style sheets that need to be validated to // account for any sheets that may be imported by the current CSS this.importedStyleSheets = new LinkedList(); this.tagName = tagName; this.isInline = (tagName != null); } /** * Returns the cleaned stylesheet. * * @return the cleaned stylesheet. */ public String getCleanStylesheet() { // Always ensure results contain most recent generation of stylesheet return styleSheet.toString(); } /** * Returns a list of imported stylesheets from the main parsed stylesheet. * * @return the import stylesheet URI list. */ public LinkedList getImportedStylesheetsURIList() { return importedStyleSheets; } /** Empties the stylesheet buffer. */ public void emptyStyleSheet() { styleSheet.delete(0, styleSheet.length()); } /** * Returns the error messages generated during parsing, if any. Note: the lack of error messages * does not mean the HTML input being sanitized can be considered safe. * * @return the error messages generated during parsing */ public Collection getErrorMessages() { return new ArrayList(errorMessages); } /* * (non-Javadoc) * * @see org.w3c.css.sac.DocumentHandler#comment(java.lang.String) */ public void comment(String text) throws CSSException { errorMessages.add( ErrorMessageUtil.getMessage( messages, ErrorMessageUtil.ERROR_COMMENT_REMOVED, new Object[] {HTMLEntityEncoder.htmlEntityEncode(text)})); } /* * (non-Javadoc) * * @see org.w3c.css.sac.DocumentHandler#ignorableAtRule(java.lang.String) */ public void ignorableAtRule(String atRule) throws CSSException { // this method is called when the parser hits an unrecognized @-rule. Like the page/media/font // declarations, this is CSS2+ stuff if (tagName != null) { errorMessages.add( ErrorMessageUtil.getMessage( messages, ErrorMessageUtil.ERROR_CSS_TAG_RULE_NOTFOUND, new Object[] { HTMLEntityEncoder.htmlEntityEncode(tagName), HTMLEntityEncoder.htmlEntityEncode(atRule) })); } else { errorMessages.add( ErrorMessageUtil.getMessage( messages, ErrorMessageUtil.ERROR_STYLESHEET_RULE_NOTFOUND, new Object[] {HTMLEntityEncoder.htmlEntityEncode(atRule)})); } } /* * (non-Javadoc) * * @see org.w3c.css.sac.DocumentHandler#importStyle(java.lang.String, * org.w3c.css.sac.SACMediaList, java.lang.String) */ public void importStyle(String uri, SACMediaList media, String defaultNamespaceURI) throws CSSException { /* The ability to import remote styles is deprecated and will be removed in a future * release. When that is done this method will simply generate the following error * message and return. */ if (!policy.isEmbedStyleSheets()) { errorMessages.add( ErrorMessageUtil.getMessage( messages, ErrorMessageUtil.ERROR_CSS_IMPORT_DISABLED, new Object[] {})); return; } try { // check for non-nullness (validate after canonicalization) if (uri == null) { errorMessages.add( ErrorMessageUtil.getMessage( messages, ErrorMessageUtil.ERROR_CSS_IMPORT_URL_INVALID, new Object[] {})); return; } URI importedStyleSheet = new URI(uri); // canonicalize the URI importedStyleSheet.normalize(); // validate the URL if (!policy.getCommonRegularExpressions("offsiteURL").matches(importedStyleSheet.toString()) && !policy .getCommonRegularExpressions("onsiteURL") .matches(importedStyleSheet.toString())) { errorMessages.add( ErrorMessageUtil.getMessage( messages, ErrorMessageUtil.ERROR_CSS_IMPORT_URL_INVALID, new Object[] {HTMLEntityEncoder.htmlEntityEncode(uri)})); return; } if (!importedStyleSheet.isAbsolute()) { // we have no concept of relative reference for free form text as an end user can't know // where the corresponding free form will end up if (tagName != null) { errorMessages.add( ErrorMessageUtil.getMessage( messages, ErrorMessageUtil.ERROR_CSS_TAG_RELATIVE, new Object[] { HTMLEntityEncoder.htmlEntityEncode(tagName), HTMLEntityEncoder.htmlEntityEncode(uri) })); } else { errorMessages.add( ErrorMessageUtil.getMessage( messages, ErrorMessageUtil.ERROR_STYLESHEET_RELATIVE, new Object[] {HTMLEntityEncoder.htmlEntityEncode(uri)})); } return; } importedStyleSheets.add(importedStyleSheet); } catch (URISyntaxException use) { errorMessages.add( ErrorMessageUtil.getMessage( messages, ErrorMessageUtil.ERROR_CSS_IMPORT_URL_INVALID, new Object[] {HTMLEntityEncoder.htmlEntityEncode(uri)})); } } /* * (non-Javadoc) * * @see org.w3c.css.sac.DocumentHandler#namespaceDeclaration(java.lang.String, * java.lang.String) */ public void namespaceDeclaration(String prefix, String uri) throws CSSException { // CSS3 - Namespace declaration - ignore for now } /* * (non-Javadoc) * * @see org.w3c.css.sac.DocumentHandler#startDocument(org.w3c.css.sac.InputSource) */ public void startDocument(InputSource arg0) throws CSSException { // no-op } /* * (non-Javadoc) * * @see org.w3c.css.sac.DocumentHandler#endDocument(org.w3c.css.sac.InputSource) */ public void endDocument(InputSource source) throws CSSException { // no-op } /* * (non-Javadoc) * * @see org.w3c.css.sac.DocumentHandler#startFontFace() */ public void startFontFace() throws CSSException { // CSS2 Font Face declaration - ignore this for now } /* * (non-Javadoc) * * @see org.w3c.css.sac.DocumentHandler#endFontFace() */ public void endFontFace() throws CSSException { // CSS2 Font Face declaration - ignore this for now } /* * (non-Javadoc) * * @see org.w3c.css.sac.DocumentHandler#startMedia(org.w3c.css.sac.SACMediaList) */ public void startMedia(SACMediaList media) throws CSSException { // CSS2 Media declaration - ignore this for now } /* * (non-Javadoc) * * @see org.w3c.css.sac.DocumentHandler#endMedia(org.w3c.css.sac.SACMediaList) */ public void endMedia(SACMediaList media) throws CSSException { // CSS2 Media declaration - ignore this for now } /* * (non-Javadoc) * * @see org.w3c.css.sac.DocumentHandler#startPage(java.lang.String, * java.lang.String) */ public void startPage(String name, String pseudoPage) throws CSSException { // CSS2 Page declaration - ignore this for now } /* * (non-Javadoc) * * @see org.w3c.css.sac.DocumentHandler#endPage(java.lang.String, * java.lang.String) */ public void endPage(String name, String pseudoPage) throws CSSException { // CSS2 Page declaration - ignore this for now } /* * (non-Javadoc) * * @see org.w3c.css.sac.DocumentHandler#startSelector(org.w3c.css.sac.SelectorList) */ public void startSelector(SelectorList selectors) throws CSSException { // keep track of number of valid selectors from this rule int selectorCount = 0; // check each selector from this rule for (int i = 0; i < selectors.getLength(); i++) { Selector selector = selectors.item(i); if (selector != null) { String selectorName = selector.toString(); boolean isValidSelector = false; try { isValidSelector = validator.isValidSelector(selectorName, selector); } catch (ScanException se) { if (tagName != null) { errorMessages.add( ErrorMessageUtil.getMessage( messages, ErrorMessageUtil.ERROR_CSS_TAG_SELECTOR_NOTFOUND, new Object[] { HTMLEntityEncoder.htmlEntityEncode(tagName), HTMLEntityEncoder.htmlEntityEncode(selector.toString()) })); } else { errorMessages.add( ErrorMessageUtil.getMessage( messages, ErrorMessageUtil.ERROR_STYLESHEET_SELECTOR_NOTFOUND, new Object[] {HTMLEntityEncoder.htmlEntityEncode(selector.toString())})); } } // if the selector is valid, add to list if (isValidSelector) { if (selectorCount > 0) { styleSheet.append(','); styleSheet.append(' '); } styleSheet.append(selectorName); selectorCount++; } else if (tagName != null) { errorMessages.add( ErrorMessageUtil.getMessage( messages, ErrorMessageUtil.ERROR_CSS_TAG_SELECTOR_DISALLOWED, new Object[] { HTMLEntityEncoder.htmlEntityEncode(tagName), HTMLEntityEncoder.htmlEntityEncode(selector.toString()) })); } else { errorMessages.add( ErrorMessageUtil.getMessage( messages, ErrorMessageUtil.ERROR_STYLESHEET_SELECTOR_DISALLOWED, new Object[] {HTMLEntityEncoder.htmlEntityEncode(selector.toString())})); } } } // if and only if there were selectors that were valid, append appropriate open brace and set // state to within selector if (selectorCount > 0) { styleSheet.append(' '); styleSheet.append('{'); styleSheet.append('\n'); selectorOpen = true; } } /* * (non-Javadoc) * * @see org.w3c.css.sac.DocumentHandler#endSelector(org.w3c.css.sac.SelectorList) */ public void endSelector(SelectorList selectors) throws CSSException { // if we are in a state within a selector, close brace if (selectorOpen) { styleSheet.append('}'); styleSheet.append('\n'); } // reset state selectorOpen = false; } /* * (non-Javadoc) * * @see org.w3c.css.sac.DocumentHandler#property(java.lang.String, * org.w3c.css.sac.LexicalUnit, boolean) */ public void property(String name, LexicalUnit value, boolean important) throws CSSException { // only bother validating and building if we are either inline or within a selector tag if (!selectorOpen && !isInline) { return; } // validate the property if (validator.isValidProperty(name, value)) { if (!isInline) { styleSheet.append('\t'); } styleSheet.append(name); styleSheet.append(':'); // append all values while (value != null) { styleSheet.append(' '); styleSheet.append(validator.lexicalValueToString(value)); value = value.getNextLexicalUnit(); } if (important) { styleSheet.append(" !important"); } styleSheet.append(';'); if (!isInline) { styleSheet.append('\n'); } } else if (tagName != null) { errorMessages.add( ErrorMessageUtil.getMessage( messages, ErrorMessageUtil.ERROR_CSS_TAG_PROPERTY_INVALID, new Object[] { HTMLEntityEncoder.htmlEntityEncode(tagName), HTMLEntityEncoder.htmlEntityEncode(name), HTMLEntityEncoder.htmlEntityEncode(validator.lexicalValueToString(value)) })); } else { errorMessages.add( ErrorMessageUtil.getMessage( messages, ErrorMessageUtil.ERROR_STYLESHEET_PROPERTY_INVALID, new Object[] { HTMLEntityEncoder.htmlEntityEncode(name), HTMLEntityEncoder.htmlEntityEncode(validator.lexicalValueToString(value)) })); } } } nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/css/CssParser.java000066400000000000000000000076631451011227400270660ustar00rootroot00000000000000/* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Sebastián Passaro * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of OWASP nor the names of its contributors may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.css; import org.apache.batik.css.parser.LexicalUnits; import org.w3c.css.sac.CSSException; import org.w3c.css.sac.CSSParseException; import org.w3c.css.sac.LexicalUnit; public class CssParser extends org.apache.batik.css.parser.Parser { /** * This implementation is a workaround to solve leading dash errors on property names. * @see https://issues.apache.org/jira/browse/BATIK-1112 * @param inSheet Specifies if the style to parse is inside a sheet or the sheet itself. * @throws CSSException Thrown if there are parsing errors in CSS */ protected void parseStyleDeclaration(final boolean inSheet) throws CSSException { boolean leadingDash = false; for (;;) { switch (current) { case LexicalUnits.EOF: if (inSheet) { throw createCSSParseException("eof"); } return; case LexicalUnits.RIGHT_CURLY_BRACE: if (!inSheet) { throw createCSSParseException("eof.expected"); } nextIgnoreSpaces(); return; case LexicalUnits.SEMI_COLON: nextIgnoreSpaces(); continue; case LexicalUnits.MINUS: leadingDash = true; next(); break; default: throw createCSSParseException("identifier"); case LexicalUnits.IDENTIFIER: } final String name = (leadingDash ? "-" : "") + scanner.getStringValue(); leadingDash = false; if (nextIgnoreSpaces() != LexicalUnits.COLON) { throw createCSSParseException("colon"); } nextIgnoreSpaces(); LexicalUnit exp = null; try { exp = parseExpression(false); } catch (final CSSParseException e) { reportError(e); } if (exp != null) { boolean important = false; if (current == LexicalUnits.IMPORTANT_SYMBOL) { important = true; nextIgnoreSpaces(); } documentHandler.property(name, exp, important); } } } } nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/css/CssScanner.java000066400000000000000000000362241451011227400272160ustar00rootroot00000000000000/* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of OWASP nor the names of its contributors may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.css; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.StringReader; import java.net.URI; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; import java.util.ResourceBundle; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.batik.css.parser.ParseException; import org.apache.batik.css.parser.Parser; import org.apache.hc.client5.http.ClientProtocolException; import org.apache.hc.client5.http.classic.HttpClient; import org.apache.hc.client5.http.classic.methods.HttpGet; import org.apache.hc.client5.http.config.RequestConfig; import org.apache.hc.client5.http.impl.classic.HttpClientBuilder; import org.apache.hc.core5.http.ClassicHttpResponse; import org.apache.hc.core5.http.HttpEntity; import org.apache.hc.core5.http.HttpStatus; import org.apache.hc.core5.http.io.HttpClientResponseHandler; import org.apache.hc.core5.http.io.entity.EntityUtils; import org.apache.hc.core5.util.Timeout; import org.owasp.validator.html.CleanResults; import org.owasp.validator.html.InternalPolicy; import org.owasp.validator.html.Policy; import org.owasp.validator.html.ScanException; import org.owasp.validator.html.util.ErrorMessageUtil; import org.owasp.validator.html.util.HTMLEntityEncoder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.css.sac.InputSource; /** * Encapsulates the parsing and validation of a CSS stylesheet or inline declaration. To make use of * this class, instantiate the scanner with the desired policy and call either * scanInlineSheet() or scanStyleSheet as appropriate. * * @see #scanInlineStyle(String, String, int) * @see #scanStyleSheet(String, int) * @author Jason Li */ public class CssScanner { protected static final Logger logger = LoggerFactory.getLogger(CssScanner.class); protected static final Timeout DEFAULT_TIMEOUT = Timeout.ofMilliseconds(1000); private static final String CDATA = "^\\s*\\s*$"; /** The parser to be used in any scanning */ private final Parser parser = new CssParser(); /** The policy file to be used in any scanning */ private final InternalPolicy policy; /** The message bundled to pull error messages from. */ private final ResourceBundle messages; /** The message bundled to pull error messages from. */ private final boolean shouldParseImportedStyles; private static final Pattern cdataMatchPattern = Pattern.compile(CDATA, Pattern.DOTALL); /** * Constructs a scanner based on the given AntiSamy policy. This version of the constructor * defaults shouldParseImportedStyles to false. Look at the other constructor for a description of * that parameter. * * @param policy the policy to follow when scanning * @param messages the error message bundle to pull from */ public CssScanner(InternalPolicy policy, ResourceBundle messages) { this(policy, messages, false); } /** * Constructs a scanner based on the given AntiSamy policy. * * @param policy the policy to follow when scanning * @param messages the error message bundle to pull from * @param shouldParseImportedStyles Flag to indicate if styles within @import directives should be * imported and parsed in the resulting style sheet. This boolean determines if URLs should be * recognized when parsing styles (i.e., to fetch them or ignore them). * @deprecated Support for remote import of styles will be removed as that is a dangerous * practice. The simpler constructor should be used which defaults to disallow such imports. */ @Deprecated public CssScanner( InternalPolicy policy, ResourceBundle messages, boolean shouldParseImportedStyles) { this.policy = policy; this.messages = messages; this.shouldParseImportedStyles = shouldParseImportedStyles; if (shouldParseImportedStyles) { logger.warn( "Allowing CSS imports from external URLs is a dangerous practice. It is recommended you " + "disable this feature. Support for this feature in AntiSamy is deprecated and will " + "be removed in a future release."); } } /** * Scans the contents of a full stylesheet (ex. a file based stylesheet or the complete stylesheet * contents as declared within <style> tags) * * @param taintedCss a String containing the contents of the CSS stylesheet to * validate * @param sizeLimit the limit on the total size in bytes of any imported stylesheets * @return a CleanResuts object containing the results of the scan * @throws ScanException if an error occurs during scanning */ public CleanResults scanStyleSheet(String taintedCss, int sizeLimit) throws ScanException { long startOfScan = System.currentTimeMillis(); List errorMessages = new ArrayList(); /* Check to see if the text starts with (\s)*(\s)*. */ Matcher m = cdataMatchPattern.matcher(taintedCss); boolean isCdata = m.matches(); if (isCdata) { taintedCss = m.group(1); } CssHandler handler = new CssHandler(policy, errorMessages, messages); // parse the stylesheet parser.setDocumentHandler(handler); try { // parse the style declaration // note this does not count against the size limit because it // should already have been counted by the caller since it was // embedded in the HTML parser.parseStyleSheet(new InputSource(new StringReader(taintedCss))); } catch (IOException | ParseException e) { /* * ParseException, from batik, is unfortunately a RuntimeException. */ throw new ScanException(e); } String cleaned = getCleanStylesheetWithImports(sizeLimit, errorMessages, handler); if (isCdata) { cleaned = ""; } return new CleanResults(startOfScan, cleaned, null, errorMessages); } /** * Scans the contents of an inline style declaration (ex. in the style attribute of an HTML tag) * and validates the style sheet according to this CssScanner's policy file. * * @param taintedCss a String containing the contents of the CSS stylesheet to * validate * @param tagName the name of the tag for which this inline style was declared * @param sizeLimit the limit on the total size in bites of any imported stylesheets * @return a CleanResuts object containing the results of the scan * @throws ScanException if an error occurs during scanning */ public CleanResults scanInlineStyle(String taintedCss, String tagName, int sizeLimit) throws ScanException { long startOfScan = System.currentTimeMillis(); List errorMessages = new ArrayList(); CssHandler handler = new CssHandler(policy, errorMessages, messages, tagName); parser.setDocumentHandler(handler); try { // parse the inline style declaration // note this does not count against the size limit because it // should already have been counted by the caller since it was // embedded in the HTML parser.parseStyleDeclaration(taintedCss); } catch (IOException ioe) { throw new ScanException(ioe); } String cleaned = getCleanStylesheetWithImports(sizeLimit, errorMessages, handler); return new CleanResults(startOfScan, cleaned, null, errorMessages); } private String getCleanStylesheetWithImports( int sizeLimit, List errorMessages, CssHandler handler) throws ScanException { String cleaned = handler.getCleanStylesheet(); if (shouldParseImportedStyles) { handler.emptyStyleSheet(); parseImportedStylesheets(handler.getImportedStylesheetsURIList(), errorMessages, sizeLimit); // If there are styles to import they must be added to the beginning cleaned = handler.getCleanStylesheet() + cleaned; } return cleaned; } /** * Parses through a LinkedList of imported stylesheet URIs, this method parses * through those stylesheets and validates them * * @param stylesheets the LinkedList of stylesheet URIs to parse * @param errorMessages the list of error messages to append to * @param sizeLimit the limit on the total size in bites of any imported stylesheets * @throws ScanException if an error occurs during scanning * @deprecated Support for remote import of styles will be removed as that is dangerous. */ @Deprecated private void parseImportedStylesheets( LinkedList stylesheets, List errorMessages, int sizeLimit) throws ScanException { // if stylesheets were imported by the inline style declaration, // continue parsing the nested styles. Note this only happens // if CSS importing was enabled in the policy file if (!stylesheets.isEmpty()) { int importedStylesheets = 0; // Ensure that we have appropriate timeout values so we don't // get DoSed waiting for returns Timeout timeout = DEFAULT_TIMEOUT; try { timeout = Timeout.ofMilliseconds(Long.parseLong(policy.getDirective(Policy.CONNECTION_TIMEOUT))); } catch (NumberFormatException nfe) { // Use default if can't parse policy specified value } RequestConfig requestConfig = RequestConfig.custom() .setConnectTimeout(timeout) .setResponseTimeout(timeout) .setConnectionRequestTimeout(timeout) .build(); HttpClient httpClient = HttpClientBuilder.create() .disableAutomaticRetries() .disableConnectionState() .disableCookieManagement() .setDefaultRequestConfig(requestConfig) .build(); int allowedImports = Policy.DEFAULT_MAX_STYLESHEET_IMPORTS; try { allowedImports = Integer.parseInt(policy.getDirective("maxStyleSheetImports")); } catch (NumberFormatException nfe) { // Use default if can't parse policy specified value } while (!stylesheets.isEmpty()) { URI stylesheetUri = stylesheets.removeFirst(); if (++importedStylesheets > allowedImports) { errorMessages.add( ErrorMessageUtil.getMessage( messages, ErrorMessageUtil.ERROR_CSS_IMPORT_EXCEEDED, new Object[] { HTMLEntityEncoder.htmlEntityEncode(stylesheetUri.toString()), String.valueOf(allowedImports) })); continue; } // Pulled directly from: // https://github.com/apache/httpcomponents-client/blob/5.1.x/httpclient5/src/test/java/org/apache/hc/client5/http/examples/ClientWithResponseHandler.java // Create a custom response handler to read in the stylesheet final HttpClientResponseHandler responseHandler = new HttpClientResponseHandler() { @Override public String handleResponse(final ClassicHttpResponse response) throws IOException { final int status = response.getCode(); if (status >= HttpStatus.SC_SUCCESS && status < HttpStatus.SC_REDIRECTION) { final HttpEntity entity = response.getEntity(); try { return entity != null ? EntityUtils.toString(entity) : null; } catch (final ParseException | org.apache.hc.core5.http.ParseException ex) { throw new ClientProtocolException(ex); } } else { throw new ClientProtocolException("Unexpected response status: " + status); } } }; byte[] stylesheet = null; try { String responseBody = httpClient.execute(new HttpGet(stylesheetUri), responseHandler); // pull down stylesheet, observing size limit. // Note: There is a SpotBugs warning on the next line: "Found reliance on default encoding // in org.owasp.validator.css.CssScanner.parseImportedStylesheets(LinkedList, List, int): // String.getBytes()" but since this method is deprecated, not going to address it as it // will 'go away' eventually. stylesheet = responseBody.getBytes(); if (stylesheet != null && stylesheet.length > sizeLimit) { errorMessages.add( ErrorMessageUtil.getMessage( messages, ErrorMessageUtil.ERROR_CSS_IMPORT_INPUT_SIZE, new Object[] { HTMLEntityEncoder.htmlEntityEncode(stylesheetUri.toString()), String.valueOf(policy.getMaxInputSize()) })); stylesheet = null; } } catch (IOException ioe) { errorMessages.add( ErrorMessageUtil.getMessage( messages, ErrorMessageUtil.ERROR_CSS_IMPORT_FAILURE, new Object[] {HTMLEntityEncoder.htmlEntityEncode(stylesheetUri.toString())})); } if (stylesheet != null) { // decrease the size limit based on the sizeLimit -= stylesheet.length; try { InputSource nextStyleSheet = new InputSource( new InputStreamReader( new ByteArrayInputStream(stylesheet), Charset.forName("UTF8"))); parser.parseStyleSheet(nextStyleSheet); } catch (IOException ioe) { throw new ScanException(ioe); } } } // end while } // end if } // end parseImportedStylesheets() } nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/css/CssValidator.java000066400000000000000000000367071451011227400275600ustar00rootroot00000000000000/* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of OWASP nor the names of its contributors may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.css; import java.util.Iterator; import java.util.regex.Pattern; import org.owasp.validator.html.Policy; import org.owasp.validator.html.ScanException; import org.owasp.validator.html.model.AntiSamyPattern; import org.owasp.validator.html.model.Property; import org.owasp.validator.html.util.HTMLEntityEncoder; import org.w3c.css.sac.AttributeCondition; import org.w3c.css.sac.CombinatorCondition; import org.w3c.css.sac.Condition; import org.w3c.css.sac.ConditionalSelector; import org.w3c.css.sac.DescendantSelector; import org.w3c.css.sac.LexicalUnit; import org.w3c.css.sac.NegativeCondition; import org.w3c.css.sac.NegativeSelector; import org.w3c.css.sac.Selector; import org.w3c.css.sac.SiblingSelector; import org.w3c.css.sac.SimpleSelector; /** * Encapsulates all the necessary operations for validating individual elements of a stylesheet * (namely: selectors, conditions, and properties). * * @author Jason Li */ public class CssValidator { private final Policy policy; /** * Constructs a validator for CSS selectors, conditions, and properties based on the given * AntiSamy policy. * * @param policy the policy file to use with this validator */ public CssValidator(Policy policy) { this.policy = policy; } /** * Determines whether the given property (both name and value) are valid according to this * validator's policy. * * @param name the name of the property * @param lu the value of the property * @return true if this property name/value is valid; false otherwise */ public boolean isValidProperty(String name, LexicalUnit lu) { boolean isValid = false; Property property = null; if (name != null) { property = policy.getPropertyByName(name.toLowerCase()); } // if we were able to find the property by name, validate the value if (property != null) { // validate all values attached to this property isValid = true; while (lu != null) { String value = lexicalValueToString(lu); if (value == null || !validateValue(property, value)) { isValid = false; break; } lu = lu.getNextLexicalUnit(); } } return isValid; } /** * Determines whether the given selector name is valid according to this validator's policy. * * @param selectorName the name of the selector * @param selector the object representation of the selector * @return true if this selector name is valid; false otherwise * @throws ScanException When there is a problem encountered while scanning this selector */ public boolean isValidSelector(String selectorName, Selector selector) throws ScanException { // determine correct behavior switch (selector.getSelectorType()) { case Selector.SAC_ANY_NODE_SELECTOR: case Selector.SAC_ELEMENT_NODE_SELECTOR: case Selector.SAC_PSEUDO_ELEMENT_SELECTOR: case Selector.SAC_ROOT_NODE_SELECTOR: // these selectors are the most base selectors return validateSimpleSelector((SimpleSelector) selector); case Selector.SAC_CHILD_SELECTOR: case Selector.SAC_DESCENDANT_SELECTOR: // these are compound selectors - decompose into simple selectors DescendantSelector descSelector = (DescendantSelector) selector; return isValidSelector(selectorName, descSelector.getSimpleSelector()) && isValidSelector(selectorName, descSelector.getAncestorSelector()); case Selector.SAC_CONDITIONAL_SELECTOR: // this is a compound selector - decompose into simple selectors ConditionalSelector condSelector = (ConditionalSelector) selector; return isValidSelector(selectorName, condSelector.getSimpleSelector()) && isValidCondition(selectorName, condSelector.getCondition()); case Selector.SAC_DIRECT_ADJACENT_SELECTOR: // this is a compound selector - decompose into simple selectors SiblingSelector sibSelector = (SiblingSelector) selector; return isValidSelector(selectorName, sibSelector.getSiblingSelector()) && isValidSelector(selectorName, sibSelector.getSelector()); case Selector.SAC_NEGATIVE_SELECTOR: // this is a compound selector with one simple selector return validateSimpleSelector((NegativeSelector) selector); case Selector.SAC_CDATA_SECTION_NODE_SELECTOR: case Selector.SAC_COMMENT_NODE_SELECTOR: case Selector.SAC_PROCESSING_INSTRUCTION_NODE_SELECTOR: case Selector.SAC_TEXT_NODE_SELECTOR: default: throw new UnknownSelectorException(HTMLEntityEncoder.htmlEntityEncode(selector.toString())); } } /** * Validates a basic selector against the policy * * @param selector the object representation of the selector * @return true if this selector name is valid; false otherwise */ private boolean validateSimpleSelector(SimpleSelector selector) { // ensure the name follows the valid pattern and is not blacklisted // by the exclusion pattern. // NOTE: intentionally using non-short-circuited AND operator to // generate all relevant error messages String selectorLowerCase = selector.toString().toLowerCase(); return policy.getCommonRegularExpressions("cssElementSelector").matches(selectorLowerCase) && !policy.getCommonRegularExpressions("cssElementExclusion").matches(selectorLowerCase); } /** * Determines whether the given condition is valid according to this validator's policy. * * @param selectorName the name of the selector that contains this condition * @param condition the object representation of this condition * @return true if this condition is valid; false otherwise * @throws ScanException When there is a problem encountered while scanning this condition */ public boolean isValidCondition(String selectorName, Condition condition) throws ScanException { switch (condition.getConditionType()) { case Condition.SAC_AND_CONDITION: case Condition.SAC_OR_CONDITION: // these are compound condition - decompose into simple conditions CombinatorCondition comboCondition = (CombinatorCondition) condition; return isValidCondition(selectorName, comboCondition.getFirstCondition()) && isValidCondition(selectorName, comboCondition.getSecondCondition()); case Condition.SAC_CLASS_CONDITION: // this is a basic class condition; compare condition against // valid pattern and is not blacklisted by exclusion pattern return validateCondition( (AttributeCondition) condition, policy.getCommonRegularExpressions("cssClassSelector"), policy.getCommonRegularExpressions("cssClassExclusion")); case Condition.SAC_ID_CONDITION: // this is a basic ID condition; compare condition against // valid pattern and is not blacklisted by exclusion pattern return validateCondition( (AttributeCondition) condition, policy.getCommonRegularExpressions("cssIDSelector"), policy.getCommonRegularExpressions("cssIDExclusion")); case Condition.SAC_PSEUDO_CLASS_CONDITION: // this is a basic psuedo element condition; compare condition // against valid pattern and is not blacklisted by exclusion pattern return validateCondition( (AttributeCondition) condition, policy.getCommonRegularExpressions("cssPseudoElementSelector"), policy.getCommonRegularExpressions("cssPsuedoElementExclusion")); case Condition.SAC_BEGIN_HYPHEN_ATTRIBUTE_CONDITION: case Condition.SAC_ONE_OF_ATTRIBUTE_CONDITION: case Condition.SAC_ATTRIBUTE_CONDITION: // this is a basic class condition; compare condition against // valid pattern and is not blacklisted by exclusion pattern return validateCondition( (AttributeCondition) condition, policy.getCommonRegularExpressions("cssAttributeSelector"), policy.getCommonRegularExpressions("cssAttributeExclusion")); case Condition.SAC_NEGATIVE_CONDITION: // this is a compound condition; decompose to simple condition return isValidCondition(selectorName, ((NegativeCondition) condition).getCondition()); case Condition.SAC_ONLY_CHILD_CONDITION: case Condition.SAC_ONLY_TYPE_CONDITION: // :only-child and :only-of-type are constants return true; case Condition.SAC_POSITIONAL_CONDITION: case Condition.SAC_CONTENT_CONDITION: case Condition.SAC_LANG_CONDITION: default: throw new UnknownSelectorException(HTMLEntityEncoder.htmlEntityEncode(selectorName)); } } /** * Validates a basic condition against the white list pattern and the blacklist pattern * * @param condition the object representation of the condition * @param pattern the positive pattern of valid conditions * @param exclusionPattern the negative pattern of excluded conditions * @return true if this selector name is valid; false otherwise */ private boolean validateCondition( AttributeCondition condition, AntiSamyPattern pattern, AntiSamyPattern exclusionPattern) { // check that the name of the condition matches valid pattern and does // not match exclusion pattern // NOTE: intentionally using non-short-circuited AND operator to // generate all relevant error messages String otherLower = condition.toString().toLowerCase(); return pattern.matches(otherLower) && !exclusionPattern.matches(otherLower); } /** * Determines whether the given property value is valid according to this validator's policy. * * @param property the object representation of the property and its associated policy * @param value the string representation of the value * @return true if the property is valid; false otherwise */ private boolean validateValue(Property property, String value) { boolean isValid = false; // normalize the value to lowercase value = value.toLowerCase(); // check if the value matches any of the allowed literal values Iterator allowedValues = property.getAllowedValues().iterator(); while (allowedValues.hasNext() && !isValid) { String allowedValue = (String) allowedValues.next(); if (allowedValue != null && allowedValue.equals(value)) { isValid = true; } } // check if the value matches any of the allowed regular expressions Iterator allowedRegexps = property.getAllowedRegExp().iterator(); while (allowedRegexps.hasNext() && !isValid) { Pattern pattern = (Pattern) allowedRegexps.next(); if (pattern != null && pattern.matcher(value).matches()) { isValid = true; } } // check if the value matches any of the allowed shorthands Iterator shorthandRefs = property.getShorthandRefs().iterator(); while (shorthandRefs.hasNext() && !isValid) { String shorthandRef = (String) shorthandRefs.next(); Property shorthand = policy.getPropertyByName(shorthandRef); if (shorthand != null) { isValid = validateValue(shorthand, value); } } return isValid; } /** * Converts the given lexical unit to a String representation. This method does not * perform any validation - it is meant to be used in conjunction with the validator/logging * methods. * * @param lu the lexical unit to convert * @return a String representation of the given lexical unit */ public String lexicalValueToString(LexicalUnit lu) { switch (lu.getLexicalUnitType()) { case LexicalUnit.SAC_PERCENTAGE: case LexicalUnit.SAC_DIMENSION: case LexicalUnit.SAC_EM: case LexicalUnit.SAC_EX: case LexicalUnit.SAC_PIXEL: case LexicalUnit.SAC_INCH: case LexicalUnit.SAC_CENTIMETER: case LexicalUnit.SAC_MILLIMETER: case LexicalUnit.SAC_POINT: case LexicalUnit.SAC_PICA: case LexicalUnit.SAC_DEGREE: case LexicalUnit.SAC_GRADIAN: case LexicalUnit.SAC_RADIAN: case LexicalUnit.SAC_MILLISECOND: case LexicalUnit.SAC_SECOND: case LexicalUnit.SAC_HERTZ: case LexicalUnit.SAC_KILOHERTZ: // these are all measurements return lu.getFloatValue() + lu.getDimensionUnitText(); case LexicalUnit.SAC_INTEGER: // just a number return String.valueOf(lu.getIntegerValue()); case LexicalUnit.SAC_REAL: // just a number return String.valueOf(lu.getFloatValue()); case LexicalUnit.SAC_STRING_VALUE: case LexicalUnit.SAC_IDENT: // just a string/identifier String stringValue = lu.getStringValue(); if (stringValue.indexOf(" ") != -1) stringValue = "'" + stringValue + "'"; return stringValue; case LexicalUnit.SAC_URI: // this is a URL return "url(" + lu.getStringValue() + ")"; case LexicalUnit.SAC_RGBCOLOR: // this is a rgb encoded color StringBuffer sb = new StringBuffer("rgb("); LexicalUnit param = lu.getParameters(); sb.append(param.getIntegerValue()); // R value sb.append(','); param = param.getNextLexicalUnit(); // comma param = param.getNextLexicalUnit(); // G value sb.append(param.getIntegerValue()); sb.append(','); param = param.getNextLexicalUnit(); // comma param = param.getNextLexicalUnit(); // B value sb.append(param.getIntegerValue()); sb.append(')'); return sb.toString(); case LexicalUnit.SAC_INHERIT: // constant return "inherit"; case LexicalUnit.SAC_OPERATOR_COMMA: return ","; case LexicalUnit.SAC_ATTR: case LexicalUnit.SAC_COUNTER_FUNCTION: case LexicalUnit.SAC_COUNTERS_FUNCTION: case LexicalUnit.SAC_FUNCTION: case LexicalUnit.SAC_RECT_FUNCTION: case LexicalUnit.SAC_SUB_EXPRESSION: case LexicalUnit.SAC_UNICODERANGE: default: // these are properties that shouldn't be necessary for most run // of the mill HTML/CSS return null; } } } nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/css/UnknownSelectorException.java000066400000000000000000000040721451011227400321670ustar00rootroot00000000000000/* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.css; /** * This exception gets thrown when there is an unrecognized Selector type parsing the tainted CSS. * * @author Jason Li */ import org.owasp.validator.html.ScanException; public class UnknownSelectorException extends ScanException { private final String selectorName; public UnknownSelectorException(String selectorName) { super("Unknown selector " + selectorName); this.selectorName = selectorName; } /** @return the selectorName */ public String getSelectorName() { return selectorName; } } nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/css/package.html000066400000000000000000000011311451011227400265570ustar00rootroot00000000000000 This package contains implementation classes to handle the sanitization of Cascading Style Sheets (CSS) and should not be directly used by clients. Whenever we do a Java 9 (or later release) of AntiSamy we plan to package this into a Java Module so it is not publicly accessible. As such, AntiSamy users should not invoke anything in this package directly. nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/html/000077500000000000000000000000001451011227400244565ustar00rootroot00000000000000nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/html/AntiSamy.java000066400000000000000000000212141451011227400270460ustar00rootroot00000000000000/* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are permitted * provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this list of conditions * and the following disclaimer. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the documentation and/or other * materials provided with the distribution. Neither the name of OWASP nor the names of its * contributors may be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.html; import java.io.File; import java.io.Reader; import java.io.Writer; import org.owasp.validator.html.scan.AntiSamyDOMScanner; import org.owasp.validator.html.scan.AntiSamySAXScanner; /** * This and the {@code CleanResults} class are generally the only classes which the outside world * should be calling. The {@code scan()} method holds the meat and potatoes of AntiSamy. The file * contains a number of ways for {@code scan()}'ing, depending on the accessibility of the policy * file. However, it should be noted that the SAX scan type, which uses a SAX-based parser should be * the preferred way of using AntiSamy as it is much more efficient, and generally faster, than the * DOM-based parser. * * @author Arshan Dabirsiaghi */ public class AntiSamy { /** Designates DOM scan type which calls the DOM parser. */ public static final int DOM = 0; /** Designates SAX scan type which calls the SAX parser. */ public static final int SAX = 1; private Policy policy = null; public AntiSamy() {} public AntiSamy(Policy policy) { this.policy = policy; } /** * The scan() family of methods are the only methods the outside world should be * calling to invoke AntiSamy. This is the primary method that most AntiSamy users should be * using. This method scans the supplied HTML input and produces clean/sanitized results per the * previously configured AntiSamy policy using the SAX parser. * * @param taintedHTML Untrusted HTML which may contain malicious code. * @return A CleanResults object which contains information about the scan (including * the results). * @throws ScanException When there is a problem encountered while scanning the HTML input. * @throws PolicyException When there is a problem validating or parsing the policy file. */ public CleanResults scan(String taintedHTML) throws ScanException, PolicyException { return this.scan(taintedHTML, this.policy, SAX); } /** * This method scans the supplied HTML input and produces clean/sanitized results per the * previously configured AntiSamy policy using the specified DOM or SAX parser. * * @param taintedHTML Untrusted HTML which may contain malicious code. * @param scanType The type of scan (DOM or SAX). * @return A CleanResults object which contains information about the scan (including * the results). * @throws ScanException When there is a problem encountered while scanning the HTML input. * @throws PolicyException When there is a problem validating or parsing the policy file. */ public CleanResults scan(String taintedHTML, int scanType) throws ScanException, PolicyException { return this.scan(taintedHTML, this.policy, scanType); } /** * This method scans the supplied HTML input and produces clean/sanitized results per the supplied * AntiSamy policy using the DOM parser. * * @param taintedHTML Untrusted HTML which may contain malicious code. * @param policy The custom policy to enforce. * @return A CleanResults object which contains information about the scan (including * the results). * @throws ScanException When there is a problem encountered while scanning the HTML input. * @throws PolicyException When there is a problem validating or parsing the policy file. */ public CleanResults scan(String taintedHTML, Policy policy) throws ScanException, PolicyException { return this.scan(taintedHTML, policy, DOM); } /** * This method scans the supplied HTML input and produces clean/sanitized results per the supplied * AntiSamy policy using the specified DOM or SAX parser. * * @param taintedHTML Untrusted HTML which may contain malicious code. * @param policy The custom policy to enforce. * @param scanType The type of scan (DOM or SAX). * @return A CleanResults object which contains information about the scan (including * the results). * @throws ScanException When there is a problem encountered while scanning the HTML input. * @throws PolicyException When there is a problem validating or parsing the policy file. */ public CleanResults scan(String taintedHTML, Policy policy, int scanType) throws ScanException, PolicyException { if (policy == null) { throw new PolicyException("No policy loaded"); } if (scanType == DOM) { return new AntiSamyDOMScanner(policy).scan(taintedHTML); } else { return new AntiSamySAXScanner(policy).scan(taintedHTML); } } /** * Use this method if caller has Streams rather than Strings for I/O. This uses the SAX parser. It * is useful for when the input being processed is expected to be very large and we don't * validate, but rather simply encode as bytes are consumed from the stream. * * @param reader Reader that produces the input, possibly a little at a time * @param writer Writer that receives the cleaned output, possibly a little at a time * @param policy Policy that directs the scan * @return CleanResults where the cleanHtml is null. If caller wants the clean HTML, it must * capture the writer's contents. When using Streams, caller generally doesn't want to create * a single string containing clean HTML. * @throws ScanException When there is a problem encountered while scanning the HTML input. */ public CleanResults scan(Reader reader, Writer writer, Policy policy) throws ScanException { return (new AntiSamySAXScanner(policy)).scan(reader, writer); } /** * This method scans the supplied HTML input and produces clean/sanitized results per the supplied * AntiSamy policy file using the DOM parser. * * @param taintedHTML Untrusted HTML which may contain malicious code. * @param policyFilename The file name of the custom policy to enforce. * @return A CleanResults object which contains information about the scan (including * the results). * @throws ScanException When there is a problem encountered while scanning the HTML input. * @throws PolicyException When there is a problem validating or parsing the policy file. */ public CleanResults scan(String taintedHTML, String policyFilename) throws ScanException, PolicyException { Policy policy = Policy.getInstance(policyFilename); return this.scan(taintedHTML, policy); } /** * This method scans the supplied HTML input and produces clean/sanitized results per the supplied * AntiSamy policy file using the DOM parser. * * @param taintedHTML Untrusted HTML which may contain malicious code. * @param policyFile The File object of the custom policy to enforce. * @return A CleanResults object which contains information about the scan (including * the results). * @throws ScanException When there is a problem encountered while scanning the HTML input. * @throws PolicyException When there is a problem validating or parsing the policy file. */ public CleanResults scan(String taintedHTML, File policyFile) throws ScanException, PolicyException { Policy policy = Policy.getInstance(policyFile); return this.scan(taintedHTML, policy); } } nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/html/CleanResults.java000066400000000000000000000200001451011227400277150ustar00rootroot00000000000000/* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are permitted * provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this list of conditions * and the following disclaimer. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the documentation and/or other * materials provided with the distribution. Neither the name of OWASP nor the names of its * contributors may be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.html; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.concurrent.Callable; import org.w3c.dom.DocumentFragment; /** * This class contains the results of a scan. It primarily provides access to the clean sanitized * HTML, per the AntiSamy sanitization policy applied. It also provides access to some utility * information, like possible error messages and error message counts. * *

WARNING: The ONLY output from the class you can completely rely on is the CleanResults output. * As stated in the project README file, neither the {@code * getErrorMessages()} nor the {@code getNumberOfErrors()} methods subtly answer the question "is * this safe input?" in the affirmative if it returns an empty list. You must always use the * sanitized 'clean' input and there is no way to be sure the input passed in had no attacks. * *

The serialization and deserialization process that is critical to the effectiveness of the * sanitizer is purposefully lossy and will filter out attacks via a number of attack vectors. * Unfortunately, one of the tradeoffs of this strategy is that AntiSamy doesn't always know in * retrospect that an attack was seen. Thus, the getErrorMessages() API is there to help users * understand whether their well-intentioned input meets the requirements of the system, not help a * developer detect if an attack was present. * *

The list of error messages (available via {@code getErrorMessages()}) will let the user know * what, if any HTML errors existed, and what, when possible, any security or validation-related * errors that were detected, and what was done about them. * *

WARNING: As just stated, the absence of error messages does NOT mean there were * no attacks in the input that were sanitized out. You CANNOT rely on the {@code * getErrorMessages()} or {@code getNumberOfErrors()} methods to tell you if the input was * dangerous. You MUST use the output of {@code getCleanHTML()} to ensure your output is safe. * * @author Arshan Dabirsiaghi */ public class CleanResults { private List errorMessages; private Callable cleanHTML; private long startOfScan; // Time the scan started in milliseconds since epoch. private long elapsedScan; // Elapsed time for the scan, in milliseconds /* * A DOM object version of the clean HTML String. May be null even if clean HTML is set. */ private DocumentFragment cleanXMLDocumentFragment; /* * Default constructor. Can be extended. */ public CleanResults() { this.errorMessages = new ArrayList(); } /** * Create a clean set of results. * * @param startOfScan - The time when the scan started. * @param cleanHTML - The resulting clean HTML produced per the AntiSamy policy. * @param XMLDocumentFragment - The XML Document fragment version of the clean HTML produced * during the sanitization process. * @param errorMessages - Messages describing any errors that occurred during sanitization. */ public CleanResults( long startOfScan, final String cleanHTML, DocumentFragment XMLDocumentFragment, List errorMessages) { this( startOfScan, new Callable() { public String call() throws Exception { return cleanHTML; } }, XMLDocumentFragment, errorMessages); } /** * Create a clean set of results. * * @param startOfScan - The time when the scan started. * @param cleanHTML - The resulting clean HTML produced per the AntiSamy policy. * @param XMLDocumentFragment - The XML Document fragment version of the clean HTML produced * during the sanitization process. * @param errorMessages - Messages describing any errors that occurred during sanitization. */ public CleanResults( long startOfScan, Callable cleanHTML, DocumentFragment XMLDocumentFragment, List errorMessages) { this.startOfScan = startOfScan; this.elapsedScan = System.currentTimeMillis() - startOfScan; this.cleanHTML = cleanHTML; this.cleanXMLDocumentFragment = XMLDocumentFragment; this.errorMessages = Collections.unmodifiableList(errorMessages); } /** * Return the filtered HTML as a String. This output is the ONLY output you can trust to be safe. * The absence of error messages does NOT indicate the input was safe. * * @return A String object which contains the serialized, safe HTML. */ public String getCleanHTML() { try { return cleanHTML.call(); } catch (Exception e) { throw new RuntimeException(e); } } /** * Return the DOM version of the clean HTML. * * @return The XML Document fragment version of the clean HTML produced during the sanitization * process. This may be null, even if the clean HTML String is not null. */ public DocumentFragment getCleanXMLDocumentFragment() { return cleanXMLDocumentFragment; } /** * Return a list of error messages -- but an empty list returned does not mean there was no attack * present, due to the serialization and deserialization process automatically cleaning up some * attacks. Only the output of the {@code getCleanHTML()} should be considered safe. See the * project README file and {@code CleanResults} class documentation for further discussion. * * @return An ArrayList object which contains the error messages, if any, after a scan. * @see Project README * @see #getCleanHTML() */ public List getErrorMessages() { return errorMessages; } /** * Return the number of errors identified, if any, during filtering. Note that 0 errors does NOT * mean the input was safe. Only the output of {@code getCleanHTML()} can be considered safe. * * @return The number of errors encountered during filtering. * @see #getCleanHTML() */ public int getNumberOfErrors() { return errorMessages.size(); } /** * Return the time elapsed during the scan. * * @return A double indicating the amount of time elapsed between the beginning and end of the * scan in seconds. */ public double getScanTime() { return elapsedScan / 1000D; } /** * Get the time the scan started. * * @return time that scan started in milliseconds since epoch. */ public long getStartOfScan() { return startOfScan; } } nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/html/InternalPolicy.java000066400000000000000000000137551451011227400302700ustar00rootroot00000000000000package org.owasp.validator.html; import java.util.Map; import org.owasp.validator.html.model.Property; import org.owasp.validator.html.model.Tag; /** * Contains a bunch of optimized lookups over the regular Policy Class. For internal use only. * *

Not part of any public API and may explode or self-destruct at any given moment, preferably * both. * * @author Kristian Rosenvold */ public class InternalPolicy extends Policy { private final int maxInputSize; private final boolean isNofollowAnchors; private final boolean isNoopenerAndNoreferrerAnchors; private final boolean isValidateParamAsEmbed; private final boolean formatOutput; private final boolean preserveSpace; private final boolean omitXmlDeclaration; private final boolean omitDoctypeDeclaration; private final boolean entityEncodeIntlCharacters; private final Tag embedTag; private final Tag styleTag; private final String onUnknownTag; private final boolean preserveComments; private final boolean embedStyleSheets; private final boolean isEncodeUnknownTag; private final boolean allowDynamicAttributes; protected InternalPolicy(ParseContext parseContext) { super(parseContext); this.maxInputSize = determineMaxInputSize(); this.isNofollowAnchors = isTrue(Policy.ANCHORS_NOFOLLOW); this.isNoopenerAndNoreferrerAnchors = isTrue(Policy.ANCHORS_NOOPENER_NOREFERRER); this.isValidateParamAsEmbed = isTrue(Policy.VALIDATE_PARAM_AS_EMBED); this.formatOutput = isTrue(Policy.FORMAT_OUTPUT); this.preserveSpace = isTrue(Policy.PRESERVE_SPACE); this.omitXmlDeclaration = isTrue(Policy.OMIT_XML_DECLARATION); this.omitDoctypeDeclaration = isTrue(Policy.OMIT_DOCTYPE_DECLARATION); this.entityEncodeIntlCharacters = isTrue(Policy.ENTITY_ENCODE_INTL_CHARS); this.embedTag = getTagByLowercaseName("embed"); this.onUnknownTag = getDirective("onUnknownTag"); this.isEncodeUnknownTag = Policy.ACTION_ENCODE.equals(onUnknownTag); this.preserveComments = isTrue(Policy.PRESERVE_COMMENTS); this.styleTag = getTagByLowercaseName("style"); this.embedStyleSheets = isTrue(Policy.EMBED_STYLESHEETS); this.allowDynamicAttributes = isTrue(Policy.ALLOW_DYNAMIC_ATTRIBUTES); if (!isNoopenerAndNoreferrerAnchors) { logger.warn( "The directive \"" + Policy.ANCHORS_NOOPENER_NOREFERRER + "\" is enabled by default, but disabled in this policy. It is recommended to leave it enabled to prevent reverse tabnabbing attacks."); } } protected InternalPolicy( Policy old, Map directives, Map tagRules, Map cssRules) { super(old, directives, tagRules, cssRules); this.maxInputSize = determineMaxInputSize(); this.isNofollowAnchors = isTrue(Policy.ANCHORS_NOFOLLOW); this.isNoopenerAndNoreferrerAnchors = isTrue(Policy.ANCHORS_NOOPENER_NOREFERRER); this.isValidateParamAsEmbed = isTrue(Policy.VALIDATE_PARAM_AS_EMBED); this.formatOutput = isTrue(Policy.FORMAT_OUTPUT); this.preserveSpace = isTrue(Policy.PRESERVE_SPACE); this.omitXmlDeclaration = isTrue(Policy.OMIT_XML_DECLARATION); this.omitDoctypeDeclaration = isTrue(Policy.OMIT_DOCTYPE_DECLARATION); this.entityEncodeIntlCharacters = isTrue(Policy.ENTITY_ENCODE_INTL_CHARS); this.embedTag = getTagByLowercaseName("embed"); this.onUnknownTag = getDirective("onUnknownTag"); this.isEncodeUnknownTag = Policy.ACTION_ENCODE.equals(onUnknownTag); this.preserveComments = isTrue(Policy.PRESERVE_COMMENTS); this.styleTag = getTagByLowercaseName("style"); this.embedStyleSheets = isTrue(Policy.EMBED_STYLESHEETS); this.allowDynamicAttributes = isTrue(Policy.ALLOW_DYNAMIC_ATTRIBUTES); if (!isNoopenerAndNoreferrerAnchors) { logger.warn( "The directive \"" + Policy.ANCHORS_NOOPENER_NOREFERRER + "\" is enabled by default, but disabled in this policy. It is recommended to leave it enabled to prevent reverse tabnabbing attacks."); } } public Tag getEmbedTag() { return embedTag; } public Tag getStyleTag() { return styleTag; } /** * Returns whether remote CSS can be imported. NOTE: This is dangerous and should not be enabled. * * @return True if remote CSS is allowed, false otherwise. * @deprecated Remote styles import feature to be removed and along with this error message. */ @Deprecated public boolean isEmbedStyleSheets() { return embedStyleSheets; } public boolean isPreserveComments() { return preserveComments; } public int getMaxInputSize() { return maxInputSize; } public boolean isEntityEncodeIntlCharacters() { return entityEncodeIntlCharacters; } public boolean isNofollowAnchors() { return isNofollowAnchors; } public boolean isNoopenerAndNoreferrerAnchors() { return isNoopenerAndNoreferrerAnchors; } public boolean isValidateParamAsEmbed() { return isValidateParamAsEmbed; } public boolean isFormatOutput() { return formatOutput; } public boolean isPreserveSpace() { return preserveSpace; } public boolean isOmitXmlDeclaration() { return omitXmlDeclaration; } public boolean isOmitDoctypeDeclaration() { return omitDoctypeDeclaration; } private boolean isTrue(String anchorsNofollow) { return "true".equals(getDirective(anchorsNofollow)); } public String getOnUnknownTag() { return onUnknownTag; } public boolean isEncodeUnknownTag() { return isEncodeUnknownTag; } public boolean isAllowDynamicAttributes() { return allowDynamicAttributes; } /** * Returns the maximum input size. If this value is not specified by the policy, the * DEFAULT_MAX_INPUT_SIZE is used. * * @return the maximum input size. */ public int determineMaxInputSize() { int maxInputSize = Policy.DEFAULT_MAX_INPUT_SIZE; try { maxInputSize = Integer.parseInt(getDirective("maxInputSize")); } catch (NumberFormatException ignore) { } return maxInputSize; } } nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/html/Policy.java000066400000000000000000001260721451011227400265700ustar00rootroot00000000000000/* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li, Kristian Rosenvold * * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.html; import static org.owasp.validator.html.util.XMLUtil.getAttributeValue; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.net.MalformedURLException; import java.net.URI; import java.net.URL; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.Callable; import java.util.regex.Pattern; import javax.xml.XMLConstants; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.Source; import javax.xml.transform.stream.StreamSource; import javax.xml.validation.Schema; import javax.xml.validation.SchemaFactory; import org.owasp.validator.html.model.AntiSamyPattern; import org.owasp.validator.html.model.Attribute; import org.owasp.validator.html.model.Property; import org.owasp.validator.html.model.Tag; import org.owasp.validator.html.scan.Constants; import org.owasp.validator.html.util.URIUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.xml.sax.ErrorHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; /** * This class holds the model for our policy engine. * *

## Schema validation behavior change starting with AntiSamy 1.6.0 ## * *

Prior to v1.6.0 AntiSamy was not actually enforcing it's defined XSD. For all of v1.6.x, by * default AntiSamy enforced the schema, and wouldn't continue if the AntiSamy policy was invalid. * However, we recognized that it might not be possible for developers to fix their AntiSamy * policies right away so we provided two ways to (temporarily!) disable schema validation. Via a * direct method call and via a System property. * *

## Starting with AntiSamy 1.7.0, schema validation is Mandatory. * *

Logging: The logging introduced in 1.6+ uses slf4j. But AntiSamy doesn't actually include an * slf4j implementation library. AntiSamy users must import and properly configure an slf4j logging * library if they want to see the very few log messages generated by AntiSamy. * * @author Arshan Dabirsiaghi */ public class Policy { protected static final Logger logger = LoggerFactory.getLogger(Policy.class); public static final Pattern ANYTHING_REGEXP = Pattern.compile(".*", Pattern.DOTALL); private static final String POLICY_SCHEMA_URI = "antisamy.xsd"; protected static final String DEFAULT_POLICY_URI = "antisamy.xml"; private static final String DEFAULT_ONINVALID = "removeAttribute"; public static final int DEFAULT_MAX_INPUT_SIZE = 100000; public static final String OMIT_XML_DECLARATION = "omitXmlDeclaration"; public static final String OMIT_DOCTYPE_DECLARATION = "omitDoctypeDeclaration"; public static final String FORMAT_OUTPUT = "formatOutput"; public static final String ANCHORS_NOFOLLOW = "nofollowAnchors"; public static final String ANCHORS_NOOPENER_NOREFERRER = "noopenerAndNoreferrerAnchors"; public static final String VALIDATE_PARAM_AS_EMBED = "validateParamAsEmbed"; public static final String PRESERVE_SPACE = "preserveSpace"; public static final String PRESERVE_COMMENTS = "preserveComments"; public static final String ENTITY_ENCODE_INTL_CHARS = "entityEncodeIntlChars"; public static final String ALLOW_DYNAMIC_ATTRIBUTES = "allowDynamicAttributes"; public static final String MAX_INPUT_SIZE = "maxInputSize"; /** @deprecated Remote styles import feature to be removed and along with this error message. */ @Deprecated public static final int DEFAULT_MAX_STYLESHEET_IMPORTS = 1; /** @deprecated Remote styles import feature to be removed and along with this error message. */ @Deprecated public static final String EMBED_STYLESHEETS = "embedStyleSheets"; /** @deprecated Remote styles import feature to be removed and along with this error message. */ @Deprecated public static final String CONNECTION_TIMEOUT = "connectionTimeout"; /** @deprecated Remote styles import feature to be removed and along with this error message. */ @Deprecated public static final String MAX_STYLESHEET_IMPORTS = "maxStyleSheetImports"; public static final String EXTERNAL_GENERAL_ENTITIES = "http://xml.org/sax/features/external-general-entities"; public static final String EXTERNAL_PARAM_ENTITIES = "http://xml.org/sax/features/external-parameter-entities"; public static final String DISALLOW_DOCTYPE_DECL = "http://apache.org/xml/features/disallow-doctype-decl"; public static final String LOAD_EXTERNAL_DTD = "http://apache.org/xml/features/nonvalidating/load-external-dtd"; public static final String ACTION_VALIDATE = "validate"; public static final String ACTION_FILTER = "filter"; public static final String ACTION_TRUNCATE = "truncate"; public static final String ACTION_ENCODE = "encode"; private final Map commonRegularExpressions; protected final Map tagRules; protected final Map cssRules; protected final Map directives; private final Map globalAttributes; private final Map dynamicAttributes; private final TagMatcher allowedEmptyTagsMatcher; private final TagMatcher requiresClosingTagsMatcher; /** XML Schema for policy validation */ private static volatile Schema schema = null; /** * Get the Tag specified by the provided tag name. * * @param tagName The name of the Tag to return. * @return The requested Tag, or null if it doesn't exist. */ public Tag getTagByLowercaseName(String tagName) { return tagRules.get(tagName); } protected static class ParseContext { Map commonRegularExpressions = new HashMap(); Map commonAttributes = new HashMap(); Map tagRules = new HashMap(); Map cssRules = new HashMap(); Map directives = new HashMap(); Map globalAttributes = new HashMap(); Map dynamicAttributes = new HashMap(); List allowedEmptyTags = new ArrayList(); List requireClosingTags = new ArrayList(); public void resetParamsWhereLastConfigWins() { allowedEmptyTags.clear(); requireClosingTags.clear(); } } /** * Retrieves a CSS Property from the Policy. * * @param propertyName The name of the CSS Property to look up. * @return The CSS Property associated with the name specified, or null if none is found. */ public Property getPropertyByName(String propertyName) { return cssRules.get(propertyName.toLowerCase()); } /** * Construct a Policy using the default policy file location ("antisamy.xml"). * * @return A populated Policy object based on the XML policy file located in the default location. * @throws PolicyException If the file is not found or there is a problem parsing the file. */ public static Policy getInstance() throws PolicyException { return getInstance(Policy.class.getClassLoader().getResource(DEFAULT_POLICY_URI)); } /** * Construct a Policy based on the file whose name is passed in. * * @param filename The path to the XML policy file. * @return A populated Policy object based on the XML policy file located in the location passed * in. * @throws PolicyException If the file is not found or there is a problem parsing the file. */ public static Policy getInstance(String filename) throws PolicyException { File file = new File(filename); return getInstance(file); } /** * Construct a Policy from the InputStream object passed in. * * @param inputStream An InputStream which contains the XML policy information. * @return A populated Policy object based on the XML policy file pointed to by the inputStream * parameter. * @throws PolicyException If there is a problem parsing the input stream. */ public static Policy getInstance(InputStream inputStream) throws PolicyException { logger.info("Attempting to load AntiSamy policy from an input stream."); return new InternalPolicy(getSimpleParseContext(getTopLevelElement(inputStream))); } /** * Construct a Policy from the File object passed in. * * @param file A File object which contains the XML policy information. * @return A populated Policy object based on the XML policy file pointed to by the File * parameter. * @throws PolicyException If the file is not found or there is a problem parsing the file. */ public static Policy getInstance(File file) throws PolicyException { try { URI uri = file.toURI(); return getInstance(uri.toURL()); } catch (IOException e) { throw new PolicyException(e); } } /** * Construct a Policy from the target of the URL passed in.
*
* NOTE: This is the only factory method that will work with <include> tags in AntiSamy * policy files.
*
* For security reasons, the provided URL must point to a local file. Currently only 'file:' and * 'jar:' URL prefixes are allowed. If you want to use a different URL format, and are confident * that the URL points to a safe source, you can open the target of the URL with URL.openStream(), * and use the getInstance(InputStream) constructor instead. For example, Spring has classpath: * and Wildfly/Jboss supports vfs: for accessing local files. Just be aware that this alternate * constructor doesn't support the use of <include> tags, per the NOTE above. * * @param url A URL object which contains the XML policy information. * @return A populated Policy object based on the XML policy file pointed to by the File * parameter. * @throws PolicyException If the file is not found or there is a problem parsing the file. */ public static Policy getInstance(URL url) throws PolicyException { logger.info("Attempting to load AntiSamy policy from URL: " + url.toString()); return new InternalPolicy(getParseContext(getTopLevelElement(url), url)); } protected Policy(ParseContext parseContext) { this.allowedEmptyTagsMatcher = new TagMatcher(parseContext.allowedEmptyTags); this.requiresClosingTagsMatcher = new TagMatcher(parseContext.requireClosingTags); this.commonRegularExpressions = Collections.unmodifiableMap(parseContext.commonRegularExpressions); this.tagRules = Collections.unmodifiableMap(parseContext.tagRules); this.cssRules = Collections.unmodifiableMap(parseContext.cssRules); this.directives = Collections.unmodifiableMap(parseContext.directives); this.globalAttributes = Collections.unmodifiableMap(parseContext.globalAttributes); this.dynamicAttributes = Collections.unmodifiableMap(parseContext.dynamicAttributes); } protected Policy( Policy old, Map directives, Map tagRules, Map cssRules) { this.allowedEmptyTagsMatcher = old.allowedEmptyTagsMatcher; this.requiresClosingTagsMatcher = old.requiresClosingTagsMatcher; this.commonRegularExpressions = old.commonRegularExpressions; this.tagRules = tagRules; this.cssRules = cssRules; this.directives = directives; this.globalAttributes = old.globalAttributes; this.dynamicAttributes = old.dynamicAttributes; } protected static ParseContext getSimpleParseContext(Element topLevelElement) throws PolicyException { ParseContext parseContext = new ParseContext(); if (getByTagName(topLevelElement, "include").iterator().hasNext()) { throw new IllegalArgumentException( "A policy file loaded with an InputStream cannot contain include references"); } parsePolicy(topLevelElement, parseContext); return parseContext; } protected static ParseContext getParseContext(Element topLevelElement, URL baseUrl) throws PolicyException { ParseContext parseContext = new ParseContext(); /** * Are there any included policies? These are parsed here first so that rules in _this_ policy * file will override included rules. * *

NOTE that by this being here we only support one level of includes. To support recursion, * move this into the parsePolicy method. */ for (Element include : getByTagName(topLevelElement, "include")) { String href = getAttributeValue(include, "href"); Element includedPolicy = getPolicy(href, baseUrl); parsePolicy(includedPolicy, parseContext); } parsePolicy(topLevelElement, parseContext); return parseContext; } protected static Element getTopLevelElement(final URL baseUrl) throws PolicyException { final InputSource source = getSourceFromUrl(baseUrl); return getTopLevelElement( source, new Callable() { @Override public InputSource call() throws PolicyException { return getSourceFromUrl(baseUrl); } }); } @SuppressFBWarnings( value = "SECURITY", justification = "Opening a stream to the provided URL is not " + "a vulnerability because it points to a local JAR file.") protected static InputSource getSourceFromUrl(URL baseUrl) throws PolicyException { try { InputSource source = resolveEntity(baseUrl.toExternalForm(), baseUrl); if (source == null) { source = new InputSource(baseUrl.toExternalForm()); source.setByteStream(baseUrl.openStream()); } else { source.setSystemId(baseUrl.toExternalForm()); } return source; } catch (SAXException | IOException e) { // SAXException can't actually happen. See JavaDoc for resolveEntity(String, URL) throw new PolicyException(e); } } private static Element getTopLevelElement(InputStream is) throws PolicyException { final InputSource source = new InputSource(toByteArrayStream(is)); return getTopLevelElement( source, new Callable() { @Override public InputSource call() throws IOException { source.getByteStream().reset(); return source; } }); } protected static Element getTopLevelElement( InputSource source, Callable getResetSource) throws PolicyException { // Track whether an exception was ever thrown while processing policy file try { return getDocumentElementFromSource(source); } catch (SAXException | ParserConfigurationException | IOException e) { throw new PolicyException(e); } } /* * This method takes an arbitrary input stream, copies its contents into a byte[], then returns it * in a ByteArrayInputStream, closing the provided InputStream in the process. It's purpose is to * ensure that the InputStream we are using can be reset to the beginning, as not all InputStream's properly * allow this. We use this for AntiSamy XML policy files, which we never expect to get that large * (e.g., a few Kb at most). */ private static InputStream toByteArrayStream(InputStream in) throws PolicyException { byte[] byteArray; try (Reader reader = new InputStreamReader(in, Charset.forName("UTF8"))) { char[] charArray = new char[8 * 1024]; StringBuilder builder = new StringBuilder(); int numCharsRead; while ((numCharsRead = reader.read(charArray, 0, charArray.length)) != -1) { builder.append(charArray, 0, numCharsRead); } byteArray = builder.toString().getBytes(Charset.forName("UTF8")); } catch (IOException ioe) { throw new PolicyException(ioe); } return new ByteArrayInputStream(byteArray); } private static Element getDocumentElementFromSource(InputSource source) throws ParserConfigurationException, SAXException, IOException { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); /** Disable external entities, etc. */ dbf.setFeature(EXTERNAL_GENERAL_ENTITIES, false); dbf.setFeature(EXTERNAL_PARAM_ENTITIES, false); dbf.setFeature(DISALLOW_DOCTYPE_DECL, true); dbf.setFeature(LOAD_EXTERNAL_DTD, false); // Schema validation is always required now. So turn it on. getPolicySchema(); dbf.setNamespaceAware(true); dbf.setSchema(schema); DocumentBuilder db = dbf.newDocumentBuilder(); db.setErrorHandler(new SAXErrorHandler()); Document dom = db.parse(source); return dom.getDocumentElement(); } private static void parsePolicy(Element topLevelElement, ParseContext parseContext) throws PolicyException { if (topLevelElement == null) return; parseContext.resetParamsWhereLastConfigWins(); parseCommonRegExps( getFirstChild(topLevelElement, "common-regexps"), parseContext.commonRegularExpressions); parseDirectives(getFirstChild(topLevelElement, "directives"), parseContext.directives); parseCommonAttributes( getFirstChild(topLevelElement, "common-attributes"), parseContext.commonAttributes, parseContext.commonRegularExpressions); parseGlobalAttributes( getFirstChild(topLevelElement, "global-tag-attributes"), parseContext.globalAttributes, parseContext.commonAttributes); parseDynamicAttributes( getFirstChild(topLevelElement, "dynamic-tag-attributes"), parseContext.dynamicAttributes, parseContext.commonAttributes); parseTagRules( getFirstChild(topLevelElement, "tag-rules"), parseContext.commonAttributes, parseContext.commonRegularExpressions, parseContext.tagRules); parseCSSRules( getFirstChild(topLevelElement, "css-rules"), parseContext.cssRules, parseContext.commonRegularExpressions); parseAllowedEmptyTags( getFirstChild(topLevelElement, "allowed-empty-tags"), parseContext.allowedEmptyTags); parseRequireClosingTags( getFirstChild(topLevelElement, "require-closing-tags"), parseContext.requireClosingTags); } /** Returns the top level element of a loaded policy Document */ @SuppressFBWarnings( value = "SECURITY", justification = "Opening a stream to the provided URL is not " + "a vulnerability because only local file URLs are allowed.") private static Element getPolicy(String href, URL baseUrl) throws PolicyException { // Track whether an exception was ever thrown while processing policy file try { return getDocumentElementByUrl(href, baseUrl); } catch (SAXException | ParserConfigurationException | IOException e) { throw new PolicyException(e); } } // TODO: Add JavaDocs for this new method. @SuppressFBWarnings( value = "SECURITY", justification = "Opening a stream to the provided URL is not " + "a vulnerability because only local file URLs are allowed.") private static Element getDocumentElementByUrl(String href, URL baseUrl) throws IOException, ParserConfigurationException, SAXException { InputSource source = null; // Can't resolve public id, but might be able to resolve relative // system id, since we have a base URI. if (href != null && baseUrl != null) { verifyLocalUrl(baseUrl); URL url; try { url = new URL(baseUrl, href); logger.info("Attempting to load AntiSamy policy from URL: " + url.toString()); source = new InputSource(url.openStream()); source.setSystemId(href); } catch (MalformedURLException | FileNotFoundException e) { try { String absURL = URIUtils.resolveAsString(href, baseUrl.toString()); url = new URL(absURL); source = new InputSource(url.openStream()); source.setSystemId(href); } catch (MalformedURLException ex2) { // nothing to do // TODO: Is this true? Or should we at least log the original exception, or // rethrow it? } } } DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); /** Disable external entities, etc. */ dbf.setFeature(EXTERNAL_GENERAL_ENTITIES, false); dbf.setFeature(EXTERNAL_PARAM_ENTITIES, false); dbf.setFeature(DISALLOW_DOCTYPE_DECL, true); dbf.setFeature(LOAD_EXTERNAL_DTD, false); // This code doesn't have the retry logic if schema validation fails. It is up to the caller // to try again, // if this fails the first time (if they want to). getPolicySchema(); dbf.setNamespaceAware(true); dbf.setSchema(schema); DocumentBuilder db = dbf.newDocumentBuilder(); db.setErrorHandler(new SAXErrorHandler()); // Load and parse the file. if (source != null) { Document dom = db.parse(source); // Get the policy information out of it! return dom.getDocumentElement(); } return null; } /** * Creates a copy of this policy with an added/changed directive. * * @param name The directive to add/modify * @param value The value * @return A clone of the policy with the updated directive */ public Policy cloneWithDirective(String name, String value) { Map directives = new HashMap(this.directives); directives.put(name, value); return new InternalPolicy(this, Collections.unmodifiableMap(directives), tagRules, cssRules); } /** * Go through section of the policy file. * * @param root Top level of * @param directives The directives map to update */ private static void parseDirectives(Element root, Map directives) { for (Element ele : getByTagName(root, "directive")) { String name = getAttributeValue(ele, "name"); String value = getAttributeValue(ele, "value"); directives.put(name, value); } } /** * Go through section of the policy file. * * @param allowedEmptyTagsListNode Top level of * @param allowedEmptyTags The tags that can be empty */ private static void parseAllowedEmptyTags( Element allowedEmptyTagsListNode, List allowedEmptyTags) { if (allowedEmptyTagsListNode != null) { for (Element literalNode : getGrandChildrenByTagName(allowedEmptyTagsListNode, "literal-list", "literal")) { String value = getAttributeValue(literalNode, "value"); if (value != null && value.length() > 0) { allowedEmptyTags.add(value); } } } else allowedEmptyTags.addAll(Constants.defaultAllowedEmptyTags); } /** * Go through section of the policy file. * * @param requireClosingTagsListNode Top level of * @param requireClosingTags The list of tags that require closing */ private static void parseRequireClosingTags( Element requireClosingTagsListNode, List requireClosingTags) { if (requireClosingTagsListNode != null) { for (Element literalNode : getGrandChildrenByTagName(requireClosingTagsListNode, "literal-list", "literal")) { String value = getAttributeValue(literalNode, "value"); if (value != null && value.length() > 0) { requireClosingTags.add(value); } } } else requireClosingTags.addAll(Constants.defaultRequireClosingTags); } /** * Go through section of the policy file. * * @param root Top level of * @param globalAttributes1 A HashMap of global Attributes that need validation for every tag. * @param commonAttributes The common attributes * @throws PolicyException */ private static void parseGlobalAttributes( Element root, Map globalAttributes1, Map commonAttributes) throws PolicyException { for (Element ele : getByTagName(root, "attribute")) { String name = getAttributeValue(ele, "name"); Attribute toAdd = commonAttributes.get(name.toLowerCase()); if (toAdd != null) globalAttributes1.put(name.toLowerCase(), toAdd); else throw new PolicyException( "Global attribute '" + name + "' was not defined in "); } } /** * Go through section of the policy file. * * @param root Top level of * @param dynamicAttributes A HashMap of dynamic Attributes that need validation for every tag. * @param commonAttributes The common attributes * @throws PolicyException */ private static void parseDynamicAttributes( Element root, Map dynamicAttributes, Map commonAttributes) throws PolicyException { for (Element ele : getByTagName(root, "attribute")) { String name = getAttributeValue(ele, "name"); Attribute toAdd = commonAttributes.get(name.toLowerCase()); if (toAdd != null) { String attrName = name.toLowerCase().substring(0, name.length() - 1); dynamicAttributes.put(attrName, toAdd); } else throw new PolicyException( "Dynamic attribute '" + name + "' was not defined in "); } } /** * Go through the section of the policy file. * * @param root Top level of * @param commonRegularExpressions1 the antisamy pattern objects */ private static void parseCommonRegExps( Element root, Map commonRegularExpressions1) { for (Element ele : getByTagName(root, "regexp")) { String name = getAttributeValue(ele, "name"); Pattern pattern = Pattern.compile(getAttributeValue(ele, "value"), Pattern.DOTALL); commonRegularExpressions1.put(name, new AntiSamyPattern(pattern)); } } private static void parseCommonAttributes( Element root, Map commonAttributes1, Map commonRegularExpressions1) { for (Element ele : getByTagName(root, "attribute")) { String onInvalid = getAttributeValue(ele, "onInvalid"); String name = getAttributeValue(ele, "name"); List allowedRegexps = getAllowedRegexps(commonRegularExpressions1, ele); List allowedValues = getAllowedLiterals(ele); final String onInvalidStr; if (onInvalid != null && onInvalid.length() > 0) { onInvalidStr = onInvalid; } else onInvalidStr = DEFAULT_ONINVALID; String description = getAttributeValue(ele, "description"); Attribute attribute = new Attribute( getAttributeValue(ele, "name"), allowedRegexps, allowedValues, onInvalidStr, description); commonAttributes1.put(name.toLowerCase(), attribute); } } private static List getAllowedLiterals(Element ele) { List allowedValues = new ArrayList(); for (Element literalNode : getGrandChildrenByTagName(ele, "literal-list", "literal")) { String value = getAttributeValue(literalNode, "value"); if (value != null && value.length() > 0) { allowedValues.add(value); } else if (literalNode.getNodeValue() != null) { allowedValues.add(literalNode.getNodeValue()); } } return allowedValues; } private static List getAllowedRegexps( Map commonRegularExpressions1, Element ele) { List allowedRegExp = new ArrayList(); for (Element regExpNode : getGrandChildrenByTagName(ele, "regexp-list", "regexp")) { String regExpName = getAttributeValue(regExpNode, "name"); String value = getAttributeValue(regExpNode, "value"); if (regExpName != null && regExpName.length() > 0) { allowedRegExp.add(commonRegularExpressions1.get(regExpName).getPattern()); } else allowedRegExp.add(Pattern.compile(value, Pattern.DOTALL)); } return allowedRegExp; } private static List getAllowedRegexps2( Map commonRegularExpressions1, Element attributeNode, String tagName) throws PolicyException { List allowedRegexps = new ArrayList(); for (Element regExpNode : getGrandChildrenByTagName(attributeNode, "regexp-list", "regexp")) { String regExpName = getAttributeValue(regExpNode, "name"); String value = getAttributeValue(regExpNode, "value"); /* * Look up common regular expression specified * by the "name" field. They can put a common * name in the "name" field or provide a custom * value in the "value" field. They must choose * one or the other, not both. */ if (regExpName != null && regExpName.length() > 0) { AntiSamyPattern pattern = commonRegularExpressions1.get(regExpName); if (pattern != null) { allowedRegexps.add(pattern.getPattern()); } else throw new PolicyException( "Regular expression '" + regExpName + "' was referenced as a common regexp in definition of '" + tagName + "', but does not exist in "); } else if (value != null && value.length() > 0) { allowedRegexps.add(Pattern.compile(value, Pattern.DOTALL)); } } return allowedRegexps; } private static List getAllowedRegexp3( Map commonRegularExpressions1, Element ele, String name) throws PolicyException { List allowedRegExp = new ArrayList(); for (Element regExpNode : getGrandChildrenByTagName(ele, "regexp-list", "regexp")) { String regExpName = getAttributeValue(regExpNode, "name"); String value = getAttributeValue(regExpNode, "value"); AntiSamyPattern pattern = commonRegularExpressions1.get(regExpName); if (pattern != null) { allowedRegExp.add(pattern.getPattern()); } else if (value != null) { allowedRegExp.add(Pattern.compile(value, Pattern.DOTALL)); } else throw new PolicyException( "Regular expression '" + regExpName + "' was referenced as a common regexp in definition of '" + name + "', but does not exist in "); } return allowedRegExp; } private static void parseTagRules( Element root, Map commonAttributes1, Map commonRegularExpressions1, Map tagRules1) throws PolicyException { if (root == null) return; for (Element tagNode : getByTagName(root, "tag")) { String name = getAttributeValue(tagNode, "name"); String action = getAttributeValue(tagNode, "action"); NodeList attributeList = tagNode.getElementsByTagName("attribute"); Map tagAttributes = getTagAttributes(commonAttributes1, commonRegularExpressions1, attributeList, name); Tag tag = new Tag(name, tagAttributes, action); tagRules1.put(name.toLowerCase(), tag); } } private static Map getTagAttributes( Map commonAttributes1, Map commonRegularExpressions1, NodeList attributeList, String tagName) throws PolicyException { Map tagAttributes = new HashMap(); for (int j = 0; j < attributeList.getLength(); j++) { Element attributeNode = (Element) attributeList.item(j); String attrName = getAttributeValue(attributeNode, "name").toLowerCase(); if (!attributeNode.hasChildNodes()) { Attribute attribute = commonAttributes1.get(attrName); // All they provided was the name, so they must want a common attribute. if (attribute != null) { /* * If they provide onInvalid/description values here they will * override the common values. */ String onInvalid = getAttributeValue(attributeNode, "onInvalid"); String description = getAttributeValue(attributeNode, "description"); Attribute changed = attribute.mutate(onInvalid, description); commonAttributes1.put(attrName, changed); tagAttributes.put(attrName, changed); } else throw new PolicyException( "Attribute '" + getAttributeValue(attributeNode, "name") + "' was referenced as a common attribute in definition of '" + tagName + "', but does not exist in "); } else { List allowedRegexps2 = getAllowedRegexps2(commonRegularExpressions1, attributeNode, tagName); List allowedValues2 = getAllowedLiterals(attributeNode); String onInvalid = getAttributeValue(attributeNode, "onInvalid"); String description = getAttributeValue(attributeNode, "description"); Attribute attribute = new Attribute( getAttributeValue(attributeNode, "name"), allowedRegexps2, allowedValues2, onInvalid, description); // Add fully built attribute. tagAttributes.put(attrName, attribute); } } return tagAttributes; } private static void parseCSSRules( Element root, Map cssRules1, Map commonRegularExpressions1) throws PolicyException { for (Element ele : getByTagName(root, "property")) { String name = getAttributeValue(ele, "name"); String description = getAttributeValue(ele, "description"); List allowedRegexp3 = getAllowedRegexp3(commonRegularExpressions1, ele, name); List allowedValue = new ArrayList(); for (Element literalNode : getGrandChildrenByTagName(ele, "literal-list", "literal")) { allowedValue.add(getAttributeValue(literalNode, "value")); } List shortHandRefs = new ArrayList(); for (Element shorthandNode : getGrandChildrenByTagName(ele, "shorthand-list", "shorthand")) { shortHandRefs.add(getAttributeValue(shorthandNode, "name")); } String onInvalid = getAttributeValue(ele, "onInvalid"); final String onInvalidStr; if (onInvalid != null && onInvalid.length() > 0) { onInvalidStr = onInvalid; } else onInvalidStr = DEFAULT_ONINVALID; Property property = new Property( name, allowedRegexp3, allowedValue, shortHandRefs, description, onInvalidStr); cssRules1.put(name.toLowerCase(), property); } } /** * A simple method for returning on of the <global-attribute> entries by name. * * @param name The name of the global-attribute we want to look up. * @return An Attribute associated with the global-attribute lookup name specified. */ public Attribute getGlobalAttributeByName(String name) { return globalAttributes.get(name.toLowerCase()); } /** * A method for returning one of the dynamic <global-attribute> entries by name. * * @param name The name of the dynamic global-attribute we want to look up. * @return An Attribute associated with the global-attribute lookup name specified, or null if not * found. */ public Attribute getDynamicAttributeByName(String name) { Attribute dynamicAttribute = null; Set> entries = dynamicAttributes.entrySet(); for (Map.Entry entry : entries) { if (name.startsWith(entry.getKey())) { dynamicAttribute = entry.getValue(); break; } } return dynamicAttribute; } /** * Return all the allowed empty tags configured in the Policy. * * @return A String array of all the he allowed empty tags configured in the Policy. */ public TagMatcher getAllowedEmptyTags() { return allowedEmptyTagsMatcher; } /** * Return all the tags that are required to be closed with an end tag, even if they have no child * content. * * @return A String array of all the tags that are required to be closed with an end tag, even if * they have no child content. */ public TagMatcher getRequiresClosingTags() { return requiresClosingTagsMatcher; } /** * Return a directive value based on a lookup name. * * @param name The name of the directive we want to look up. * @return A String object containing the directive associated with the lookup name, or null if * none is found. */ public String getDirective(String name) { return directives.get(name); } /** * Resolves public and system IDs to files stored within the JAR. * * @param systemId The name of the entity we want to look up. * @param baseUrl The base location of the entity. * @return A String object containing the directive associated with the lookup name, or null if * none is found. * @throws IOException if the specified URL can't be opened. * @throws SAXException This exception can't actually be thrown, but left in the method signature * for API compatibility reasons. */ @SuppressFBWarnings( value = "SECURITY", justification = "Opening a stream to the provided URL is not " + "a vulnerability because only local file URLs are allowed.") public static InputSource resolveEntity(final String systemId, URL baseUrl) throws IOException, SAXException { InputSource source; // Can't resolve public id, but might be able to resolve relative // system id, since we have a base URI. if (systemId != null && baseUrl != null) { verifyLocalUrl(baseUrl); URL url; try { url = new URL(baseUrl, systemId); source = new InputSource(url.openStream()); source.setSystemId(systemId); return source; } catch (MalformedURLException | FileNotFoundException e) { try { String absURL = URIUtils.resolveAsString(systemId, baseUrl.toString()); url = new URL(absURL); source = new InputSource(url.openStream()); source.setSystemId(systemId); return source; } catch (MalformedURLException ex2) { // nothing to do } } return null; } // No resolving. return null; } /** * Verify that the target of the URL is a local file only. Currently, we allow file: and jar: * URLs. The target of the URL is typically an AntiSamy policy file. * * @param url The URL to verify. * @throws MalformedURLException If the supplied URL does not reference a local file directly, or * one inside a local JAR file. */ private static void verifyLocalUrl(URL url) throws MalformedURLException { switch (url.getProtocol()) { case "file": case "jar": break; // These are OK. default: throw new MalformedURLException( "Only local files can be accessed with a policy URL. Illegal value supplied was: " + url); } } private static Element getFirstChild(Element element, String tagName) { if (element == null) return null; NodeList elementsByTagName = element.getElementsByTagName(tagName); if (elementsByTagName != null && elementsByTagName.getLength() > 0) return (Element) elementsByTagName.item(0); else return null; } private static Iterable getGrandChildrenByTagName( Element parent, String immediateChildName, String subChild) { NodeList elementsByTagName = parent.getElementsByTagName(immediateChildName); if (elementsByTagName.getLength() == 0) return Collections.emptyList(); Element regExpListNode = (Element) elementsByTagName.item(0); return getByTagName(regExpListNode, subChild); } private static Iterable getByTagName(Element parent, String tagName) { if (parent == null) return Collections.emptyList(); final NodeList nodes = parent.getElementsByTagName(tagName); return new Iterable() { public Iterator iterator() { return new Iterator() { int pos = 0; int len = nodes.getLength(); public boolean hasNext() { return pos < len; } public Element next() { return (Element) nodes.item(pos++); } public void remove() { throw new UnsupportedOperationException("Cant remove"); } }; } }; } public AntiSamyPattern getCommonRegularExpressions(String name) { return commonRegularExpressions.get(name); } private static void getPolicySchema() throws SAXException { if (schema == null) { InputStream schemaStream = Policy.class.getClassLoader().getResourceAsStream(POLICY_SCHEMA_URI); Source schemaSource = new StreamSource(schemaStream); schema = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI).newSchema(schemaSource); } } /** * This class is implemented to just throw an exception when validating the policy schema while * parsing the document. */ static class SAXErrorHandler implements ErrorHandler { @Override public void error(SAXParseException arg0) throws SAXException { throw arg0; } @Override public void fatalError(SAXParseException arg0) throws SAXException { throw arg0; } @Override public void warning(SAXParseException arg0) throws SAXException { throw arg0; } } } nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/html/PolicyException.java000066400000000000000000000037751451011227400304530ustar00rootroot00000000000000/* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.html; /** * This exception gets thrown when there is a problem validating or parsing the policy file. Any * validation errors not caught by the XML validation will be thrown with this exception. * * @author Arshan Dabirsiaghi */ public class PolicyException extends Exception { /** */ private static final long serialVersionUID = 1L; public PolicyException(Exception e) { super(e); } public PolicyException(String string) { super(string); } } nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/html/ScanException.java000066400000000000000000000040051451011227400300630ustar00rootroot00000000000000/* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.html; /** * This exception gets thrown when there is an unexpected error parsing the tainted HTML. The code * is sturdy, but the unlikely IOException or SAX exceptions are always theoretically * possible. * * @author Arshan Dabirsiaghi */ public class ScanException extends Exception { /** */ private static final long serialVersionUID = 1L; public ScanException(Exception e) { super(e); } public ScanException(String s) { super(s); } } nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/html/TagMatcher.java000066400000000000000000000045321451011227400273440ustar00rootroot00000000000000/* * Copyright (c) 2013, Kristian Rosenvold * * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.html; import java.util.HashSet; import java.util.Set; /** * Uses smart matching to match tags * * @author Kristian Rosenvold */ public class TagMatcher { private final Set allowedLowercase = new HashSet(); public TagMatcher(Iterable allowedValues) { for (String item : allowedValues) { allowedLowercase.add(item.toLowerCase()); } } /** * Examines if this tag matches the values in this matcher. * *

Please note that this is case-insensitive, which is OK for HTML, but not really for XML * * @param tagName The tag name to look for * @return true if the tag name matches this matcher */ public boolean matches(String tagName) { return allowedLowercase.contains(tagName.toLowerCase()); } public int size() { return allowedLowercase.size(); } } nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/html/model/000077500000000000000000000000001451011227400255565ustar00rootroot00000000000000nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/html/model/AntiSamyPattern.java000066400000000000000000000044021451011227400315040ustar00rootroot00000000000000/* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.html.model; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * An extension of the Pattern with helper methods. * * @author Arshan Dabirsiaghi */ public class AntiSamyPattern { private final Pattern pattern; /** * Constructor for AntiSamyPattern. * * @param pattern The Pattern to lookup based on the "name". */ public AntiSamyPattern(Pattern pattern) { this.pattern = pattern; } /** @return Return the Pattern of the AntiSamyPattern. */ public Pattern getPattern() { return pattern; } public Matcher matcher(CharSequence input) { return pattern.matcher(input); } public boolean matches(String other) { return matcher(other).matches(); } } nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/html/model/Attribute.java000066400000000000000000000164511451011227400303730ustar00rootroot00000000000000/* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li, Kristian Rosenvold * * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.html.model; import static org.owasp.validator.html.model.Tag.ANY_NORMAL_WHITESPACES; import static org.owasp.validator.html.model.Tag.ATTRIBUTE_DIVIDER; import static org.owasp.validator.html.model.Tag.CLOSE_ATTRIBUTE; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.regex.Pattern; /** * A model for HTML attributes and the "rules" they must follow (either literals or regular * expressions) in order to be considered valid. * * @author Arshan Dabirsiaghi * @author Kristian Rosenvold */ public class Attribute { private final String name; private final String description; private final String onInvalid; private final List allowedValues; private final Pattern[] allowedRegExps; private final Set allowedValuesLower; public Attribute( String name, List allowedRegexps, List allowedValues, String onInvalidStr, String description) { this.name = name; this.allowedRegExps = allowedRegexps.toArray(new Pattern[allowedRegexps.size()]); this.allowedValues = Collections.unmodifiableList(allowedValues); Set allowedValuesLower = new HashSet(); for (String allowedValue : allowedValues) { allowedValuesLower.add(allowedValue.toLowerCase()); } this.allowedValuesLower = allowedValuesLower; this.onInvalid = onInvalidStr; this.description = description; } public boolean matchesAllowedExpression(String value) { String input = value.toLowerCase(); for (Pattern pattern : allowedRegExps) { if (pattern != null && pattern.matcher(input).matches()) { return true; } } return false; } public boolean containsAllowedValue(String valueInLowerCase) { return allowedValuesLower.contains(valueInLowerCase); } public String getName() { return name; } /** * @return The onInvalid value a tag could have, from the list of "filterTag", * "removeTag" and "removeAttribute" */ public String getOnInvalid() { return onInvalid; } public Attribute mutate(String onInvalid, String description) { return new Attribute( name, Arrays.asList(allowedRegExps), allowedValues, onInvalid != null && onInvalid.length() != 0 ? onInvalid : this.onInvalid, description != null && description.length() != 0 ? description : this.description); } public String matcherRegEx(boolean hasNext) { //

StringBuilder regExp = new StringBuilder(); regExp .append(this.getName()) .append(ANY_NORMAL_WHITESPACES) .append("=") .append(ANY_NORMAL_WHITESPACES) .append("\"") .append(Tag.OPEN_ATTRIBUTE); boolean hasRegExps = allowedRegExps.length > 0; if (allowedRegExps.length + allowedValues.size() > 0) { /* * Go through and add static values to the regular expression. */ Iterator allowedValues = this.allowedValues.iterator(); while (allowedValues.hasNext()) { String allowedValue = allowedValues.next(); regExp.append(Tag.escapeRegularExpressionCharacters(allowedValue)); if (allowedValues.hasNext() || hasRegExps) { regExp.append(ATTRIBUTE_DIVIDER); } } /* * Add the regular expressions for this attribute value to the mother regular expression. */ Iterator allowedRegExps = Arrays.asList(this.allowedRegExps).iterator(); while (allowedRegExps.hasNext()) { Pattern allowedRegExp = allowedRegExps.next(); regExp.append(allowedRegExp.pattern()); if (allowedRegExps.hasNext()) { regExp.append(ATTRIBUTE_DIVIDER); } } if (this.allowedRegExps.length + this.allowedValues.size() > 0) { regExp.append(CLOSE_ATTRIBUTE); } regExp.append("\"" + ANY_NORMAL_WHITESPACES); if (hasNext) { regExp.append(ATTRIBUTE_DIVIDER); } } return regExp.toString(); } /** * This method takes the current rel attribute values and, depending on which ones to * add, appends the corresponding values if they are not already present. It is meant to be used * with anchor tags. * * @param addNofollow Specifies if "nofollow" value should be added in case it is not * present. * @param addNoopenerAndNoreferrer Specifies if "noopener noreferrer" value should be * added in case it is not present. * @param currentRelValue Current rel attribute value, it will be merged with the * values specified from the previous parameters. * @return The new rel attribute value to replace in an anchor tag. */ public static String mergeRelValuesInAnchor( boolean addNofollow, boolean addNoopenerAndNoreferrer, String currentRelValue) { String newRelValue = ""; if (currentRelValue == null || currentRelValue.isEmpty()) { if (addNofollow) newRelValue = "nofollow"; if (addNoopenerAndNoreferrer) newRelValue += " noopener noreferrer"; } else { ArrayList relTokens = new ArrayList<>(); newRelValue = currentRelValue; for (String value : currentRelValue.split(" ")) { relTokens.add(value.toLowerCase()); } if (addNofollow && !relTokens.contains("nofollow")) { newRelValue += " nofollow"; } if (addNoopenerAndNoreferrer) { if (!relTokens.contains("noopener")) { newRelValue += " noopener"; } if (!relTokens.contains("noreferrer")) { newRelValue += " noreferrer"; } } } return newRelValue.trim(); } } nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/html/model/Property.java000066400000000000000000000062671451011227400302600ustar00rootroot00000000000000/* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of OWASP nor the names of its contributors may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.html.model; import java.util.Collections; import java.util.List; import java.util.regex.Pattern; /** * A model for CSS properties and the "rules" they must follow (either literals or regular * expressions) in order to be considered valid. * * @author Jason Li */ public class Property { private final String name; private final List allowedRegExp; private final List allowedValues; private final List shorthandRefs; public Property( String name, List allowedRegexp3, List allowedValue, List shortHandRefs, String description, String onInvalidStr) { this.name = name; this.allowedRegExp = Collections.unmodifiableList(allowedRegexp3); this.allowedValues = Collections.unmodifiableList(allowedValue); this.shorthandRefs = Collections.unmodifiableList(shortHandRefs); } /** * Return a List of allowed regular expressions * * @return The List of allowed regular expressions. */ public List getAllowedRegExp() { return allowedRegExp; } /** * Return a List of allowed literal values * * @return The List of allowed literal values. */ public List getAllowedValues() { return allowedValues; } /** * Return a List of allowed shorthand references * * @return The List of allowed shorthand references. */ public List getShorthandRefs() { return shorthandRefs; } /** * Get the name of the property. * * @return The name of the property. */ public String getName() { return name; } } nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/html/model/Tag.java000066400000000000000000000126661451011227400271470ustar00rootroot00000000000000/* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.html.model; import java.util.*; /** * A model for HTML "tags" and the rules dictating their validation/filtration. Also contains * information about their allowed attributes.
*
* There is also some experimental (unused) code in here for generating a valid regular expression * according to a policy file on a per-tag basis. * * @author Arshan Dabirsiaghi */ public class Tag { /* * These are the fields pulled from the policy XML. */ private final Map allowedAttributes; private final String name; private final String action; public Tag(String name, Map tagAttributes, String action) { this.name = name; this.allowedAttributes = Collections.unmodifiableMap(tagAttributes); this.action = action; } /** * @return The action for this tag which is one of filter, validate or * remove. */ public String getAction() { return action; } /** * Indicates if the action for this tag matches the supplied action * * @param action The action to match against * @return True if it matches */ public boolean isAction(String action) { return action.equals(this.action); } public Tag mutateAction(String action) { return new Tag(this.name, this.allowedAttributes, action); } /* --------------------------------------------------------------------------------------------------*/ /** * Returns a regular expression for validating individual tags. Not used by the AntiSamy scanner, * but you might find some use for this. * * @return A regular expression for the tag, i.e., "^<b>$" or * "<hr(\s)*(width='((\w){2,3}(\%)*)'>" */ public String getRegularExpression() { /* * For such tags as , , */ if (allowedAttributes.size() == 0) { return "^<" + name + ">$"; } StringBuilder regExp = new StringBuilder("<" + ANY_NORMAL_WHITESPACES + name + OPEN_TAG_ATTRIBUTES); List values = new ArrayList(allowedAttributes.values()); Collections.sort( values, new Comparator() { public int compare(Attribute o1, Attribute o2) { return o1.getName().compareTo(o2.getName()); } }); Iterator attributes = values.iterator(); while (attributes.hasNext()) { Attribute attr = attributes.next(); regExp.append(attr.matcherRegEx(attributes.hasNext())); } regExp.append(CLOSE_TAG_ATTRIBUTES + ANY_NORMAL_WHITESPACES + ">"); return regExp.toString(); } static String escapeRegularExpressionCharacters(String allowedValue) { String toReturn = allowedValue; if (toReturn == null) { return null; } for (int i = 0; i < REGEXP_CHARACTERS.length(); i++) { toReturn = toReturn.replaceAll( "\\" + String.valueOf(REGEXP_CHARACTERS.charAt(i)), "\\" + REGEXP_CHARACTERS.charAt(i)); } return toReturn; } /** Begin Variables Needed For Generating Regular Expressions * */ static final String ANY_NORMAL_WHITESPACES = "(\\s)*"; static final String OPEN_ATTRIBUTE = "("; static final String ATTRIBUTE_DIVIDER = "|"; static final String CLOSE_ATTRIBUTE = ")"; private static final String OPEN_TAG_ATTRIBUTES = ANY_NORMAL_WHITESPACES + OPEN_ATTRIBUTE; private static final String CLOSE_TAG_ATTRIBUTES = ")*"; private static final String REGEXP_CHARACTERS = "\\(){}.*?$^-+"; /** @return The String name of the tag. */ public String getName() { return name; } /** * Returns an Attribute associated with a lookup name. * * @param name The name of the allowed attribute by name. * @return The Attribute object associated with the name, or */ public Attribute getAttributeByName(String name) { return allowedAttributes.get(name); } } nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/html/model/package.html000066400000000000000000000010571451011227400300420ustar00rootroot00000000000000 This package contains implementation classes used by AntiSamy and should not be directly used by clients. Whenever we do a Java 9 (or later release) of AntiSamy we plan to package this into a Java Module so it is not publicly accessible. As such, AntiSamy users should not invoke anything in this package directly. nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/html/package.html000066400000000000000000000004751451011227400267450ustar00rootroot00000000000000 This package contains classes to handle the sanitization of HTML markup. nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/html/scan/000077500000000000000000000000001451011227400254025ustar00rootroot00000000000000nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java000066400000000000000000000057171451011227400313010ustar00rootroot00000000000000package org.owasp.validator.html.scan; import java.io.IOException; import java.io.Writer; import org.apache.xml.serialize.ElementState; import org.apache.xml.serialize.HTMLdtd; import org.apache.xml.serialize.OutputFormat; import org.owasp.validator.html.InternalPolicy; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @SuppressWarnings("deprecation") public class ASHTMLSerializer extends org.apache.xml.serialize.HTMLSerializer { private static final Logger logger = LoggerFactory.getLogger(ASHTMLSerializer.class); private boolean encodeAllPossibleEntities; public ASHTMLSerializer(Writer w, OutputFormat format, InternalPolicy policy) { super(w, format); this.encodeAllPossibleEntities = policy.isEntityEncodeIntlCharacters(); } protected String getEntityRef(int charToPrint) { if (encodeAllPossibleEntities || Constants.big5CharsToEncode.indexOf(charToPrint) != -1) return super.getEntityRef(charToPrint); return null; } public void endElementIO(String namespaceURI, String localName, String rawName) throws IOException { ElementState state; // Works much like content() with additions for closing an element. Note the different checks // for the closed element's state and the parent element's state. _printer.unindent(); state = getElementState(); if (state.empty) _printer.printText('>'); // This element is not empty and that last content was another element, so print a line break // before that last element and this element's closing tag. [keith] Provided this is not an // anchor. HTML: some elements do not print closing tag (e.g. LI) if (rawName == null || !HTMLdtd.isOnlyOpening(rawName) || HTMLdtd.isOptionalClosing(rawName)) { if (_indenting && !state.preserveSpace && state.afterElement) _printer.breakLine(); // Must leave CData section first (Illegal in HTML, but still) if (state.inCData) _printer.printText("]]>"); _printer.printText("'); } // Leave the element state and update that of the parent (if we're not root) to not empty and // after element. state = leaveElementState(); // Temporary hack to prevent line breaks inside A/TD if (rawName == null || (!rawName.equalsIgnoreCase("A") && !rawName.equalsIgnoreCase("TD"))) state.afterElement = true; state.empty = false; if (isDocumentState()) _printer.flush(); } /* * The override is to use printEscaped() which already escapes entity references * and writes them in the final serialized string. As escapeURI() is called like * "printer.printText(escapeURI(value))", if the URI is returned here it would * be double-printed and that is why the return value is an empty string. */ @Override protected String escapeURI(String uri) { try { printEscaped(uri); } catch (IOException e) { logger.error("URI escaping failed for value: " + uri); } return ""; } } nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java000066400000000000000000000122111451011227400327650ustar00rootroot00000000000000/* * Copyright (c) 2007-2023, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are permitted * provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this list of conditions * and the following disclaimer. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the documentation and/or other * materials provided with the distribution. Neither the name of OWASP nor the names of its * contributors may be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.html.scan; import java.io.Writer; import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.MissingResourceException; import java.util.ResourceBundle; import org.apache.xml.serialize.OutputFormat; import org.owasp.validator.html.CleanResults; import org.owasp.validator.html.InternalPolicy; import org.owasp.validator.html.Policy; import org.owasp.validator.html.PolicyException; import org.owasp.validator.html.ScanException; import org.owasp.validator.html.util.ErrorMessageUtil; /** * This class defines the basic structure for each type of AntiSamy scanner. All the * scanning/filtration logic is to reside in each implementation of this class, but the * implementations should not be called directly. All scanning should be done through an * AntiSamy.scan() method invocation. * * @author Arshan Dabirsiaghi */ public abstract class AbstractAntiSamyScanner { protected final InternalPolicy policy; protected final List errorMessages = new ArrayList(); protected static final ResourceBundle messages = getResourceBundle(); protected final Locale locale = Locale.getDefault(); protected boolean isNofollowAnchors = false; protected boolean isNoopenerAndNoreferrerAnchors = false; protected boolean isValidateParamAsEmbed = false; public abstract CleanResults scan(String html) throws ScanException; public abstract CleanResults getResults(); /** * Construct an AntiSamy Scanner instance that uses the specified AntiSamy policy. * * @param policy The policy to use. */ public AbstractAntiSamyScanner(Policy policy) { assert policy instanceof InternalPolicy : policy.getClass(); this.policy = (InternalPolicy) policy; } /** * Construct an AntiSamy Scanner instance that uses the default AntiSamy policy file. * * @throws PolicyException thrown when there is a problem validating or parsing the policy file. * Any validation errors not caught by the XML validation will be thrown with this exception. */ public AbstractAntiSamyScanner() throws PolicyException { policy = (InternalPolicy) Policy.getInstance(); } private static ResourceBundle getResourceBundle() { try { return ResourceBundle.getBundle("AntiSamy", Locale.getDefault()); } catch (MissingResourceException mre) { return ResourceBundle.getBundle( "AntiSamy", new Locale(Constants.DEFAULT_LOCALE_LANG, Constants.DEFAULT_LOCALE_LOC)); } } protected void addError(String errorKey, Object[] objs) { errorMessages.add(ErrorMessageUtil.getMessage(messages, errorKey, objs)); } protected OutputFormat getOutputFormat() { OutputFormat format = new OutputFormat(); format.setOmitXMLDeclaration(policy.isOmitXmlDeclaration()); format.setOmitDocumentType(policy.isOmitDoctypeDeclaration()); format.setPreserveEmptyAttributes(true); format.setPreserveSpace(policy.isPreserveSpace()); if (policy.isFormatOutput()) { format.setLineWidth(80); format.setIndenting(true); format.setIndent(2); } return format; } protected org.apache.xml.serialize.HTMLSerializer getHTMLSerializer( Writer w, OutputFormat format) { return new ASHTMLSerializer(w, format, policy); } protected String trim(String original, String cleaned) { if (cleaned.endsWith("\n")) { if (!original.endsWith("\n")) { if (cleaned.endsWith("\r\n")) { cleaned = cleaned.substring(0, cleaned.length() - 2); } else if (cleaned.endsWith("\n")) { cleaned = cleaned.substring(0, cleaned.length() - 1); } } } return cleaned; } } nahsra-antisamy-45c78f1/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java000066400000000000000000000731501451011227400316520ustar00rootroot00000000000000/* * Copyright (c) 2007-2023, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are permitted * provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this list of conditions * and the following disclaimer. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the documentation and/or other * materials provided with the distribution. Neither the name of OWASP nor the names of its * contributors may be used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.owasp.validator.html.scan; import java.io.IOException; import java.io.StringReader; import java.io.StringWriter; import java.util.List; import java.util.Queue; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.batik.css.parser.ParseException; import org.htmlunit.cyberneko.parsers.DOMFragmentParser; import org.htmlunit.cyberneko.xerces.dom.DocumentImpl; import org.owasp.validator.css.CssScanner; import org.owasp.validator.html.CleanResults; import org.owasp.validator.html.Policy; import org.owasp.validator.html.PolicyException; import org.owasp.validator.html.ScanException; import org.owasp.validator.html.model.Attribute; import org.owasp.validator.html.model.Tag; import org.owasp.validator.html.util.ErrorMessageUtil; import org.owasp.validator.html.util.HTMLEntityEncoder; import org.w3c.dom.Comment; import org.w3c.dom.DOMException; import org.w3c.dom.Document; import org.w3c.dom.DocumentFragment; import org.w3c.dom.Element; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.ProcessingInstruction; import org.w3c.dom.Text; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXNotRecognizedException; import org.xml.sax.SAXNotSupportedException; /** * This is where the magic lives (all the HTML scanning/filtration logic resides here). This class * should not be called directly. All scanning should be done through an AntiSamy.scan() * method invocation. * * @author Arshan Dabirsiaghi */ public class AntiSamyDOMScanner extends AbstractAntiSamyScanner { private Document document = new DocumentImpl(); private DocumentFragment dom = document.createDocumentFragment(); private CleanResults results = null; private static final int maxDepth = 250; private static final Pattern invalidXmlCharacters = Pattern.compile("[\\u0000-\\u001F\\uD800-\\uDFFF\\uFFFE-\\uFFFF&&[^\\u0009\\u000A\\u000D]]"); private static final Pattern conditionalDirectives = Pattern.compile("?"); private static final Queue cachedItems = new ConcurrentLinkedQueue(); static class CachedItem { private final DOMFragmentParser parser; private final Matcher invalidXmlCharMatcher = invalidXmlCharacters.matcher(""); CachedItem() throws SAXNotSupportedException, SAXNotRecognizedException { this.parser = getDomParser(); } DOMFragmentParser getDomFragmentParser() { return parser; } } /** * Create an instance of this class configured to use the specified policy. * * @param policy The policy to use. */ public AntiSamyDOMScanner(Policy policy) { super(policy); } /** * Create an instance of this class configured to use the default AntiSamy policy. * * @throws PolicyException thrown when there is a problem validating or parsing the policy file. * Any validation errors not caught by the XML validation will be thrown with this exception. */ public AntiSamyDOMScanner() throws PolicyException { super(); } /** * This is where the magic lives. * * @param html A String whose contents is to be sanitized per the configured AntiSamy policy. * @return A CleanResults object with (possibly) an XMLDocumentFragment * object and a String representation of the cleaned HTML, as well as some scan statistics. * Note that ONLY the cleaned HTML can be considered trustworthy. The absence of errorMessages * in the CleanResults does NOT necessarily indicate the input was safe (i.e., contained no * attacks). * @throws ScanException When there is a problem encountered while scanning the HTML input. */ @Override public CleanResults scan(String html) throws ScanException { if (html == null) { throw new ScanException(new NullPointerException("Null HTML input")); } errorMessages.clear(); int maxInputSize = policy.getMaxInputSize(); if (maxInputSize < html.length()) { addError(ErrorMessageUtil.ERROR_INPUT_SIZE, new Object[] {html.length(), maxInputSize}); throw new ScanException(errorMessages.get(0)); } isNofollowAnchors = policy.isNofollowAnchors(); isNoopenerAndNoreferrerAnchors = policy.isNoopenerAndNoreferrerAnchors(); isValidateParamAsEmbed = policy.isValidateParamAsEmbed(); long startOfScan = System.currentTimeMillis(); try { CachedItem cachedItem; cachedItem = cachedItems.poll(); if (cachedItem == null) { cachedItem = new CachedItem(); } /* * We have to replace any invalid XML characters to prevent NekoHTML * from breaking when it gets passed encodings like %21. */ html = stripNonValidXMLCharacters(html, cachedItem.invalidXmlCharMatcher); /* * First thing we do is call the HTML cleaner ("NekoHTML") on it * with the appropriate options. We choose not to omit tags due to * the fallibility of our own listing in the ever changing world of * W3C. */ DOMFragmentParser parser = cachedItem.getDomFragmentParser(); try { parser.parse(new InputSource(new StringReader(html)), dom); } catch (Exception e) { throw new ScanException(e); } processChildren(dom, 0); /* * Serialize the output and then return the resulting DOM object and * its string representation. */ final String trimmedHtml = html; StringWriter out = new StringWriter(); @SuppressWarnings("deprecation") org.apache.xml.serialize.OutputFormat format = getOutputFormat(); //noinspection deprecation org.apache.xml.serialize.HTMLSerializer serializer = getHTMLSerializer(out, format); serializer.serialize(dom); /* * Get the String out of the StringWriter and rip out the XML * declaration if the Policy says we should. */ final String trimmed = trim(trimmedHtml, out.getBuffer().toString()); Callable cleanHtml = new Callable() { public String call() throws Exception { return trimmed; } }; /* * Return the DOM object as well as string HTML. */ results = new CleanResults(startOfScan, cleanHtml, dom, errorMessages); cachedItems.add(cachedItem); return results; } catch (SAXException | IOException e) { throw new ScanException(e); } } static DOMFragmentParser getDomParser() throws SAXNotRecognizedException, SAXNotSupportedException { DOMFragmentParser parser = new DOMFragmentParser(); parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower"); parser.setFeature("http://cyberneko.org/html/features/scanner/style/strip-cdata-delims", false); parser.setFeature("http://cyberneko.org/html/features/scanner/cdata-sections", true); parser.setFeature("http://cyberneko.org/html/features/parse-noscript-content", false); return parser; } /** * The workhorse of the scanner. Recursively scans document elements according to the policy. This * should be called implicitly through the AntiSamy.scan() method. * * @param node The node to validate. */ private void recursiveValidateTag(final Node node, int currentStackDepth) throws ScanException { currentStackDepth++; if (currentStackDepth > maxDepth) { throw new ScanException("Too many nested tags"); } if (node instanceof Comment) { processCommentNode(node); return; } boolean isElement = node instanceof Element; NodeList eleChildNodes = node.getChildNodes(); if (isElement && eleChildNodes.getLength() == 0) { if (removeDisallowedEmpty(node)) { return; } } if (node instanceof Text && Node.CDATA_SECTION_NODE == node.getNodeType()) { stripCData(node); return; } if (node instanceof ProcessingInstruction) { removePI(node); } if (!isElement) { return; } final Element ele = (Element) node; final Node parentNode = ele.getParentNode(); final String tagName = ele.getNodeName(); final String tagNameLowerCase = tagName.toLowerCase(); Tag tagRule = policy.getTagByLowercaseName(tagNameLowerCase); /* * If and no policy and isValidateParamAsEmbed and policy in * place for and policy is to validate, use custom * policy to get the tag through to the validator. */ Tag embedTag = policy.getEmbedTag(); boolean masqueradingParam = isMasqueradingParam(tagRule, embedTag, tagNameLowerCase); if (masqueradingParam) { tagRule = Constants.BASIC_PARAM_TAG_RULE; } if ((tagRule == null && policy.isEncodeUnknownTag()) || (tagRule != null && tagRule.isAction(Policy.ACTION_ENCODE))) { encodeTag(currentStackDepth, ele, tagName, eleChildNodes); } else if (tagRule == null || tagRule.isAction(Policy.ACTION_FILTER)) { actionFilter(currentStackDepth, ele, tagName, tagRule, eleChildNodes); } else if (tagRule.isAction(Policy.ACTION_VALIDATE)) { actionValidate( currentStackDepth, ele, parentNode, tagName, tagNameLowerCase, tagRule, masqueradingParam, embedTag, eleChildNodes); } else if (tagRule.isAction(Policy.ACTION_TRUNCATE)) { actionTruncate(ele, tagName, eleChildNodes); } else { /* * If we reached this that means that the tag's action is "remove", * which means to remove the tag (including its contents). */ addError( ErrorMessageUtil.ERROR_TAG_DISALLOWED, new Object[] {HTMLEntityEncoder.htmlEntityEncode(tagName)}); removeNode(ele); } } private boolean isMasqueradingParam(Tag tagRule, Tag embedTag, String tagNameLowerCase) { if (tagRule == null && isValidateParamAsEmbed && "param".equals(tagNameLowerCase)) { return embedTag != null && embedTag.isAction(Policy.ACTION_VALIDATE); } return false; } private void encodeTag(int currentStackDepth, Element ele, String tagName, NodeList eleChildNodes) throws ScanException { addError( ErrorMessageUtil.ERROR_TAG_ENCODED, new Object[] {HTMLEntityEncoder.htmlEntityEncode(tagName)}); processChildren(eleChildNodes, currentStackDepth); /* * Transform the tag to text, HTML-encode it and promote the * children. The tag will be kept in the fragment as one or two text * Nodes located before and after the children; representing how the * tag used to wrap them. */ encodeAndPromoteChildren(ele); } private void actionFilter( int currentStackDepth, Element ele, String tagName, Tag tag, NodeList eleChildNodes) throws ScanException { if (tag == null) { addError( ErrorMessageUtil.ERROR_TAG_NOT_IN_POLICY, new Object[] {HTMLEntityEncoder.htmlEntityEncode(tagName)}); } else { addError( ErrorMessageUtil.ERROR_TAG_FILTERED, new Object[] {HTMLEntityEncoder.htmlEntityEncode(tagName)}); } processChildren(eleChildNodes, currentStackDepth); promoteChildren(ele); } private void actionValidate( int currentStackDepth, Element ele, Node parentNode, String tagName, String tagNameLowerCase, Tag tag, boolean masqueradingParam, Tag embedTag, NodeList eleChildNodes) throws ScanException { /* * If doing as , now is the time to convert it. */ String nameValue = null; if (masqueradingParam) { nameValue = ele.getAttribute("name"); if (nameValue != null && !"".equals(nameValue)) { String valueValue = ele.getAttribute("value"); ele.setAttribute(nameValue, valueValue); ele.removeAttribute("name"); ele.removeAttribute("value"); tag = embedTag; } } /* * Check to see if it's a ", policy, AntiSamy.DOM) .getCleanHTML() .contains("ha.ckers.org")); assertTrue( !as.scan("", policy, AntiSamy.SAX) .getCleanHTML() .contains("ha.ckers.org")); assertTrue( !as.scan( "", policy, AntiSamy.DOM) .getCleanHTML() .contains("ha.ckers.org")); assertTrue( !as.scan( "", policy, AntiSamy.SAX) .getCleanHTML() .contains("ha.ckers.org")); assertTrue( !as.scan( "