Skip to content

Commit 638f649

Browse files
committed
Removed lxml.etree.XMLParser from xml bomb sinks
1 parent 887d80f commit 638f649

File tree

3 files changed

+4
-20
lines changed

3 files changed

+4
-20
lines changed

python/ql/lib/semmle/python/frameworks/Lxml.qll

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -129,11 +129,6 @@ module Lxml {
129129
any(True t)
130130
)
131131
or
132-
kind.isXmlBomb() and
133-
this.getKeywordParameter("huge_tree").getAValueReachingSink().asExpr() = any(True t) and
134-
not this.getKeywordParameter("resolve_entities").getAValueReachingSink().asExpr() =
135-
any(False t)
136-
or
137132
kind.isDtdRetrieval() and
138133
this.getKeywordParameter("load_dtd").getAValueReachingSink().asExpr() = any(True t) and
139134
this.getKeywordParameter("no_network").getAValueReachingSink().asExpr() = any(False t)
@@ -305,9 +300,8 @@ module Lxml {
305300
// note that there is no `resolve_entities` argument, so it's not possible to turn off XXE :O
306301
kind.isXxe()
307302
or
308-
kind.isXmlBomb() and
309-
this.getKeywordParameter("huge_tree").getAValueReachingSink().asExpr() = any(True t)
310-
or
303+
// libxml2 has built-in protection against XML bombs via entity reference loop detection,
304+
// so lxml is not vulnerable to XML bomb attacks.
311305
kind.isDtdRetrieval() and
312306
this.getKeywordParameter("load_dtd").getAValueReachingSink().asExpr() = any(True t) and
313307
this.getKeywordParameter("no_network").getAValueReachingSink().asExpr() = any(False t)

python/ql/test/library-tests/frameworks/lxml/parsing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050

5151
# Billion laughs vuln (also XXE)
5252
parser = lxml.etree.XMLParser(huge_tree=True)
53-
lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..)
53+
lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..)
5454

5555
# Safe for both Billion laughs and XXE
5656
parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True)
@@ -63,5 +63,5 @@
6363
# iterparse configurations ... this doesn't use a parser argument but takes MOST (!) of
6464
# the normal XMLParser arguments. Specifically, it doesn't allow disabling XXE :O
6565

66-
lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XML bomb' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file
66+
lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file
6767
lxml.etree.iterparse(xml_file, load_dtd=True, no_network=False) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file
Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,4 @@
11
edges
2-
| test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:1:26:1:32 | ControlFlowNode for request | provenance | |
3-
| test.py:1:26:1:32 | ControlFlowNode for request | test.py:19:19:19:25 | ControlFlowNode for request | provenance | |
4-
| test.py:19:5:19:15 | ControlFlowNode for xml_content | test.py:30:34:30:44 | ControlFlowNode for xml_content | provenance | |
5-
| test.py:19:19:19:25 | ControlFlowNode for request | test.py:19:5:19:15 | ControlFlowNode for xml_content | provenance | AdditionalTaintStep |
62
nodes
7-
| test.py:1:26:1:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
8-
| test.py:1:26:1:32 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
9-
| test.py:19:5:19:15 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
10-
| test.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
11-
| test.py:30:34:30:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content |
123
subpaths
134
#select
14-
| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:30:34:30:44 | ControlFlowNode for xml_content | XML parsing depends on a $@ without guarding against uncontrolled entity expansion. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |

0 commit comments

Comments
 (0)