-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Python: Modernize 4 queries for missing/multiple calls to init/del methods #19932
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 20 commits
271f32e
a2fc14a
6f9983a
adcfdf1
caddec4
71d1179
085df26
2faf67d
1b4e2fe
16b90a1
b3056fc
73057d3
2e6f35b
c5b79fa
804b9ef
6ca4f32
2e5f470
d2c68de
f1026e4
c47e6e3
4b49ac3
d163bdf
e8a65b8
f5066c7
2c93e2c
7dad89f
d2a8e5d
b33a1c2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
/** Definitions for reasoning about multiple or missing calls to superclass methods. */ | ||
|
||
import python | ||
import semmle.python.ApiGraphs | ||
import semmle.python.dataflow.new.internal.DataFlowDispatch | ||
import codeql.util.Option | ||
|
||
/** Holds if `meth` is a method named `name` that transitively calls `calledMulti` of the same name via the calls `call1` and `call2`. */ | ||
predicate multipleCallsToSuperclassMethod( | ||
Function meth, Function calledMulti, DataFlow::MethodCallNode call1, | ||
DataFlow::MethodCallNode call2, string name | ||
) { | ||
exists(Class cls | | ||
meth.getName() = name and | ||
meth.getScope() = cls and | ||
call1.getLocation().toString() < call2.getLocation().toString() and | ||
calledMulti = getASuperCallTargetFromCall(cls, meth, call1, name) and | ||
calledMulti = getASuperCallTargetFromCall(cls, meth, call2, name) and | ||
nonTrivial(calledMulti) | ||
) | ||
} | ||
|
||
/** Gets a method transitively called by `meth` named `name` with `call` that it overrides, with `mroBase` as the type determining the MRO to search. */ | ||
Function getASuperCallTargetFromCall( | ||
Class mroBase, Function meth, DataFlow::MethodCallNode call, string name | ||
) { | ||
exists(Function target | target = getDirectSuperCallTargetFromCall(mroBase, meth, call, name) | | ||
result = target | ||
or | ||
result = getASuperCallTargetFromCall(mroBase, target, _, name) | ||
) | ||
} | ||
|
||
/** Gets the method called by `meth` named `name` with `call`, with `mroBase` as the type determining the MRO to search. */ | ||
Function getDirectSuperCallTargetFromCall( | ||
Class mroBase, Function meth, DataFlow::MethodCallNode call, string name | ||
) { | ||
meth = call.getScope() and | ||
getADirectSuperclass*(mroBase) = meth.getScope() and | ||
meth.getName() = name and | ||
call.calls(_, name) and | ||
mroBase = getADirectSubclass*(meth.getScope()) and | ||
joefarebrother marked this conversation as resolved.
Show resolved
Hide resolved
|
||
exists(Class targetCls | | ||
// the differences between 0-arg and 2-arg super is not considered; we assume each super uses the mro of the instance `self` | ||
superCall(call, _) and | ||
targetCls = getNextClassInMroKnownStartingClass(meth.getScope(), mroBase) and | ||
result = findFunctionAccordingToMroKnownStartingClass(targetCls, mroBase, name) | ||
or | ||
// targetCls is the mro base for this lookup. | ||
// note however that if the call we find uses super(), that still uses the mro of the instance `self` will sill be used | ||
joefarebrother marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// assuming it's 0-arg or is 2-arg with `self` as second arg. | ||
callsMethodOnClassWithSelf(meth, call, targetCls, _) and | ||
result = findFunctionAccordingToMroKnownStartingClass(targetCls, targetCls, name) | ||
) | ||
} | ||
|
||
/** Gets a method that is transitively called by a call to `cls.<name>`, with `mroBase` as the type determining the MRO to search. */ | ||
Function getASuperCallTargetFromClass(Class mroBase, Class cls, string name) { | ||
exists(Function target | | ||
target = findFunctionAccordingToMroKnownStartingClass(cls, mroBase, name) and | ||
( | ||
result = target | ||
or | ||
result = getASuperCallTargetFromCall(mroBase, target, _, name) | ||
) | ||
) | ||
} | ||
Comment on lines
+66
to
+75
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This MRO business is a bit awkward. I wonder if we could clean it up by creating a new IPA type representing "the MRO starting at a particular base class", and thus avoid having to thread This is just some musing on my part -- not necessarily an immediately actionable suggestion. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That could be a useful component of a public-facing dataflow dispatch / call graph resolution API |
||
|
||
/** Holds if `meth` does something besides calling a superclass method. */ | ||
predicate nonTrivial(Function meth) { | ||
exists(Stmt s | s = meth.getAStmt() | | ||
not s instanceof Pass and | ||
not exists(DataFlow::Node call | call.asExpr() = s.(ExprStmt).getValue() | | ||
superCall(call, meth.getName()) | ||
or | ||
callsMethodOnClassWithSelf(meth, call, _, meth.getName()) | ||
) | ||
) and | ||
exists(meth.getANormalExit()) // doesn't always raise an exception | ||
} | ||
|
||
/** Holds if `call` is a call to `super().<name>`. No distinction is made btween 0- and 2- arg super calls. */ | ||
predicate superCall(DataFlow::MethodCallNode call, string name) { | ||
exists(DataFlow::Node sup | | ||
call.calls(sup, name) and | ||
sup = API::builtin("super").getACall() | ||
) | ||
joefarebrother marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
/** Holds if `meth` calls `super().<name>` where `name` is the name of the method. */ | ||
joefarebrother marked this conversation as resolved.
Show resolved
Hide resolved
|
||
predicate callsSuper(Function meth) { | ||
exists(DataFlow::MethodCallNode call | | ||
call.getScope() = meth and | ||
superCall(call, meth.getName()) | ||
) | ||
} | ||
|
||
/** Holds if `meth` calls `target.<name>(self, ...)` with the call `call`. */ | ||
predicate callsMethodOnClassWithSelf( | ||
Function meth, DataFlow::MethodCallNode call, Class target, string name | ||
) { | ||
exists(DataFlow::Node callTarget, DataFlow::ParameterNode self | | ||
call.calls(callTarget, name) and | ||
self.getParameter() = meth.getArg(0) and | ||
self.(DataFlow::LocalSourceNode).flowsTo(call.getArg(0)) and | ||
callTarget = classTracker(target) | ||
) | ||
} | ||
|
||
/** Holds if `meth` calls a method named `name` passing its `self` argument as its first parameter, but the class it refers to is unknown. */ | ||
predicate callsMethodOnUnknownClassWithSelf(Function meth, string name) { | ||
exists(DataFlow::MethodCallNode call, DataFlow::Node callTarget, DataFlow::ParameterNode self | | ||
call.calls(callTarget, name) and | ||
self.getParameter() = meth.getArg(0) and | ||
self.(DataFlow::LocalSourceNode).flowsTo(call.getArg(0)) and | ||
not exists(Class target | callTarget = classTracker(target)) | ||
|
||
) | ||
} | ||
|
||
/** Holds if `base` does not call a superclass method `shouldCall` named `name` when it appears it should. */ | ||
predicate missingCallToSuperclassMethod(Class base, Function shouldCall, string name) { | ||
shouldCall.getName() = name and | ||
shouldCall.getScope() = getADirectSuperclass+(base) and | ||
not shouldCall = getASuperCallTargetFromClass(base, base, name) and | ||
nonTrivial(shouldCall) and | ||
// "Benefit of the doubt" - if somewhere in the chain we call an unknown superclass, assume all the necessary parent methods are called from it | ||
not callsMethodOnUnknownClassWithSelf(getASuperCallTargetFromClass(base, base, name), name) | ||
} | ||
|
||
/** Holds if `base` does not call a superclass method `shouldCall` named `name` when it appears it should. | ||
* Results are restricted to hold only for the highest `base` class and the lowest `shouldCall` method in the heirarchy for which this applies. | ||
*/ | ||
|
||
predicate missingCallToSuperclassMethodRestricted(Class base, Function shouldCall, string name) { | ||
missingCallToSuperclassMethod(base, shouldCall, name) and | ||
not exists(Class superBase | | ||
// Alert only on the highest base class that has the issue | ||
superBase = getADirectSuperclass+(base) and | ||
missingCallToSuperclassMethod(superBase, shouldCall, name) | ||
) and | ||
not exists(Function subShouldCall | | ||
// Mention in the alert only the lowest method we're missing the call to | ||
subShouldCall.getScope() = getADirectSubclass+(shouldCall.getScope()) and | ||
missingCallToSuperclassMethod(base, subShouldCall, name) | ||
) | ||
} | ||
|
||
/** | ||
* If `base` contains a `super()` call, gets a method in the inheritence heirarchy of `name` in the MRO of `base` | ||
* that does not contain a `super()` call, but would call `shouldCall` if it did, which does not otherwise get called | ||
* during a call to `base.<name>`. | ||
* */ | ||
|
||
Function getPossibleMissingSuper(Class base, Function shouldCall, string name) { | ||
missingCallToSuperclassMethod(base, shouldCall, name) and | ||
exists(Function baseMethod | | ||
baseMethod.getScope() = base and | ||
baseMethod.getName() = name and | ||
// the base method calls super, so is presumably expecting every method called in the MRO chain to do so | ||
callsSuper(baseMethod) and | ||
// result is something that does get called in the chain | ||
result = getASuperCallTargetFromClass(base, base, name) and | ||
// it doesn't call super | ||
not callsSuper(result) and | ||
// if it did call super, it would resolve to the missing method | ||
shouldCall = | ||
findFunctionAccordingToMroKnownStartingClass(getNextClassInMroKnownStartingClass(result | ||
.getScope(), base), base, name) | ||
) | ||
} | ||
|
||
private module FunctionOption = Option<Function>; | ||
|
||
/** An optional `Function`. */ | ||
class FunctionOption extends FunctionOption::Option { | ||
/** | ||
* Holds if this element is at the specified location. | ||
* The location spans column `startcolumn` of line `startline` to | ||
* column `endcolumn` of line `endline` in file `filepath`. | ||
* For more information, see | ||
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). | ||
*/ | ||
predicate hasLocationInfo( | ||
string filepath, int startline, int startcolumn, int endline, int endcolumn | ||
) { | ||
this.asSome() | ||
.getLocation() | ||
.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) | ||
or | ||
this.isNone() and | ||
filepath = "" and | ||
startline = 0 and | ||
startcolumn = 0 and | ||
endline = 0 and | ||
endcolumn = 0 | ||
} | ||
|
||
/** Gets the qualified name of this function. */ | ||
string getQualifiedName() { | ||
result = this.asSome().getQualifiedName() | ||
or | ||
this.isNone() and | ||
result = "" | ||
} | ||
} | ||
|
||
/** Gets the result of `getPossibleMissingSuper`, or None if none exists. */ | ||
bindingset[name] | ||
FunctionOption getPossibleMissingSuperOption(Class base, Function shouldCall, string name) { | ||
result.asSome() = getPossibleMissingSuper(base, shouldCall, name) | ||
or | ||
not exists(getPossibleMissingSuper(base, shouldCall, name)) and | ||
result.isNone() | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
<!DOCTYPE qhelp PUBLIC | ||
"-//Semmle//qhelp//EN" | ||
"qhelp.dtd"> | ||
<qhelp> | ||
|
||
<overview> | ||
<p> | ||
Python, unlike some other object-oriented languages such as Java, allows the developer complete freedom in | ||
when and how superclass finalizers are called during object finalization. | ||
However, the developer has responsibility for ensuring that objects are properly cleaned up, and that all superclass <code>__del__</code> | ||
methods are called. | ||
</p> | ||
<p> | ||
Classes with a <code>__del__</code> method (a finalizer) typically hold some resource such as a file handle that needs to be cleaned up. | ||
If the <code>__del__</code> method of a superclass is not called during object finalization, it is likely that | ||
that resources may be leaked. | ||
</p> | ||
|
||
<p>A call to the <code>__init__</code> method of a superclass during object initialization may be unintentionally skipped: | ||
</p> | ||
<ul> | ||
<li>If a subclass calls the <code>__del__</code> method of the wrong class.</li> | ||
<li>If a call to the <code>__del__</code> method of one its base classes is omitted.</li> | ||
<li>If a call to <code>super().__del__</code> is used, but not all <code>__del__</code> methods in the Method Resolution Order (MRO) | ||
chain themselves call <code>super()</code>. This in particular arises more often in cases of multiple inheritance. </li> | ||
</ul> | ||
|
||
|
||
</overview> | ||
<recommendation> | ||
<p>Ensure that all superclass <code>__del__</code> methods are properly called. | ||
Either each base class's finalize method should be explicitly called, or <code>super()</code> calls | ||
should be consistently used throughout the inheritance hierarchy.</p> | ||
|
||
|
||
</recommendation> | ||
<example> | ||
<p>In the following example, explicit calls to <code>__del__</code> are used, but <code>SportsCar</code> erroneously calls | ||
<code>Vehicle.__del__</code>. This is fixed in <code>FixedSportsCar</code> by calling <code>Car.__del__</code>. | ||
</p> | ||
|
||
<sample src="examples/MissingCallToDel.py" /> | ||
|
||
</example> | ||
<references> | ||
|
||
<li>Python Reference: <a href="https://docs.python.org/3/reference/datamodel.html#object.__del__">__del__</a>.</li> | ||
<li>Python Standard Library: <a href="https://docs.python.org/3/library/functions.html#super">super</a>.</li> | ||
<li>Python Glossary: <a href="https://docs.python.org/3/glossary.html#term-method-resolution-order">Method resolution order</a>.</li> | ||
|
||
</references> | ||
</qhelp> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
/** | ||
* @name Missing call to superclass `__del__` during object destruction | ||
* @description An omitted call to a superclass `__del__` method may lead to class instances not being cleaned up properly. | ||
* @kind problem | ||
* @tags quality | ||
* reliability | ||
* correctness | ||
* performance | ||
* @problem.severity error | ||
* @sub-severity low | ||
* @precision high | ||
* @id py/missing-call-to-delete | ||
*/ | ||
|
||
import python | ||
import MethodCallOrder | ||
|
||
Function getDelMethod(Class c) { | ||
result = c.getAMethod() and | ||
result.getName() = "__del__" | ||
} | ||
|
||
from Class base, Function shouldCall, FunctionOption possibleIssue, string msg | ||
where | ||
not exists(Function newMethod | newMethod = base.getAMethod() and newMethod.getName() = "__new__") and | ||
exists(FunctionOption possiblyMissingSuper | | ||
missingCallToSuperclassMethodRestricted(base, shouldCall, "__del__") and | ||
possiblyMissingSuper = getPossibleMissingSuperOption(base, shouldCall, "__del__") and | ||
( | ||
not possiblyMissingSuper.isNone() and | ||
possibleIssue = possiblyMissingSuper and | ||
msg = | ||
"This class does not call $@ during finalization. ($@ may be missing a call to super().__del__)" | ||
or | ||
possiblyMissingSuper.isNone() and | ||
( | ||
possibleIssue.asSome() = getDelMethod(base) and | ||
msg = | ||
"This class does not call $@ during finalization. ($@ may be missing a call to a base class __del__)" | ||
or | ||
not exists(getDelMethod(base)) and | ||
possibleIssue.isNone() and | ||
msg = | ||
"This class does not call $@ during finalization. (The class lacks an __del__ method to ensure every base class __del__ is called.)" | ||
) | ||
) | ||
) | ||
select base, msg, shouldCall, shouldCall.getQualifiedName(), possibleIssue, | ||
possibleIssue.getQualifiedName() |
Uh oh!
There was an error while loading. Please reload this page.