Skip to content

Commit a9d736d

Browse files
committed
feat: dependency on empty project link, and context manager for sourcecode download
1 parent 21ec559 commit a9d736d

File tree

3 files changed

+55
-17
lines changed

3 files changed

+55
-17
lines changed

src/macaron/malware_analyzer/pypi_heuristics/sourcecode/pypi_sourcecode_analyzer.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,14 @@
2323
from macaron.config.global_config import global_config
2424
from macaron.errors import ConfigurationError, HeuristicAnalyzerValueError
2525
from macaron.json_tools import JsonType, json_extract
26-
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult
26+
from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
27+
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
2728
from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset
2829

2930
logger: logging.Logger = logging.getLogger(__name__)
3031

3132

32-
class PyPISourcecodeAnalyzer:
33+
class PyPISourcecodeAnalyzer(BaseHeuristicAnalyzer):
3334
"""This class is used to analyze the source code of python PyPI packages. This analyzer is a work in progress.
3435
3536
This analyzer works in two phases. In the first phase, it will perform a pattern-based scan of all python files
@@ -48,8 +49,14 @@ class PyPISourcecodeAnalyzer:
4849
of the package.
4950
"""
5051

51-
def __init__(self, resources_path: str = global_config.resources_path) -> None:
52-
"""Collect required data for analysing the source code."""
52+
def __init__(self, resources_path: str | None = None) -> None:
53+
super().__init__(
54+
name="anomalous_version_analyzer",
55+
heuristic=Heuristics.SUSPICIOUS_PATTERNS,
56+
depends_on=[(Heuristics.EMPTY_PROJECT_LINK, HeuristicResult.FAIL)],
57+
)
58+
if resources_path is None:
59+
resources_path = global_config.resources_path
5360
self.default_rule_path, self.custom_rule_path = self._load_defaults(resources_path)
5461

5562
def _load_defaults(self, resources_path: str) -> tuple[str, str | None]:

src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from macaron.database.db_custom_types import DBJsonDict
1313
from macaron.database.table_definitions import CheckFacts
14-
from macaron.errors import ConfigurationError, HeuristicAnalyzerValueError
14+
from macaron.errors import ConfigurationError, HeuristicAnalyzerValueError, SourceCodeError
1515
from macaron.json_tools import JsonType, json_extract
1616
from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
1717
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
@@ -282,27 +282,45 @@ def _should_skip(
282282
return True
283283
return False
284284

285-
def analyze_source(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]:
285+
def analyze_source(
286+
self, pypi_package_json: PyPIPackageJsonAsset, results: dict[Heuristics, HeuristicResult]
287+
) -> tuple[HeuristicResult, dict[str, JsonType]]:
286288
"""Analyze the source code of the package with a textual scan, looking for malicious code patterns.
287289
288290
Parameters
289291
----------
290292
pypi_package_json: PyPIPackageJsonAsset
291293
The PyPI package JSON asset object.
294+
results: dict[Heuristics, HeuristicResult]
295+
Containing all heuristics' results (excluding this one), where the key is the heuristic and the value is the result
296+
associated with that heuristic.
292297
293298
Returns
294299
-------
295300
tuple[HeuristicResult, dict[str, JsonType]]
296301
Containing the analysis results and relevant patterns identified.
302+
303+
Raises
304+
------
305+
HeuristicAnalyzerValueError
306+
If the analyzer fails due to malformed package information.
307+
ConfigurationError
308+
If the configuration of the analyzer encountered a problem.
297309
"""
298310
logger.debug("Instantiating %s", PyPISourcecodeAnalyzer.__name__)
299-
try:
300-
sourcecode_analyzer = PyPISourcecodeAnalyzer()
301-
return sourcecode_analyzer.analyze(pypi_package_json)
302-
except (ConfigurationError, HeuristicAnalyzerValueError) as source_code_error:
303-
logger.debug("Unable to perform source code analysis: %s", source_code_error)
311+
analyzer = PyPISourcecodeAnalyzer()
312+
313+
if analyzer.depends_on and self._should_skip(results, analyzer.depends_on):
304314
return HeuristicResult.SKIP, {}
305315

316+
try:
317+
with pypi_package_json.sourcecode():
318+
return analyzer.analyze(pypi_package_json)
319+
except SourceCodeError as error:
320+
error_msg = f"Unable to perform analysis, source code not available: {error}"
321+
logger.debug(error_msg)
322+
raise HeuristicAnalyzerValueError(error_msg) from error
323+
306324
def run_heuristics(
307325
self, pypi_package_json: PyPIPackageJsonAsset
308326
) -> tuple[dict[Heuristics, HeuristicResult], dict[str, JsonType]]:
@@ -428,9 +446,15 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
428446
confidence = Confidence.HIGH
429447
result_type = CheckResultType.PASSED
430448

431-
# experimental analyze sourcecode feature
432-
if ctx.dynamic_data["analyze_source"] and pypi_package_json.download_sourcecode():
433-
sourcecode_result, sourcecode_detail_info = self.analyze_source(pypi_package_json)
449+
# experimental sourcecode analysis feature
450+
if ctx.dynamic_data["analyze_source"]:
451+
try:
452+
sourcecode_result, sourcecode_detail_info = self.analyze_source(
453+
pypi_package_json, heuristic_results
454+
)
455+
except (HeuristicAnalyzerValueError, ConfigurationError):
456+
return CheckResultData(result_tables=[], result_type=CheckResultType.UNKNOWN)
457+
434458
heuristic_results[Heuristics.SUSPICIOUS_PATTERNS] = sourcecode_result
435459
heuristics_detail_info.update(sourcecode_detail_info)
436460

@@ -440,8 +464,6 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
440464
confidence = Confidence.LOW
441465
result_type = CheckResultType.FAILED
442466

443-
pypi_package_json.cleanup_sourcecode()
444-
445467
result_tables.append(
446468
MaliciousMetadataFacts(
447469
result=heuristic_results,

src/macaron/slsa_analyzer/package_registry/pypi_registry.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
import tarfile
1111
import tempfile
1212
import urllib.parse
13-
from collections.abc import Callable, Iterator
13+
from collections.abc import Callable, Generator, Iterator
14+
from contextlib import contextmanager
1415
from dataclasses import dataclass
1516
from datetime import datetime
1617

@@ -538,6 +539,14 @@ def get_latest_release_upload_time(self) -> str | None:
538539
return upload_time
539540
return None
540541

542+
@contextmanager
543+
def sourcecode(self) -> Generator[None]:
544+
"""Download and cleanup source code of the package with a context manager."""
545+
if not self.download_sourcecode():
546+
raise SourceCodeError("Unable to download package source code.")
547+
yield
548+
self.cleanup_sourcecode()
549+
541550
def download_sourcecode(self) -> bool:
542551
"""Get the source code of the package and store it in a temporary directory.
543552

0 commit comments

Comments
 (0)