dusty/scanners/sast/gitleaks/scanner.py (150 lines of code) (raw):
#!/usr/bin/python3
# coding=utf-8
# pylint: disable=I0011,E0401,W0702,W0703
# Copyright 2019 getcarrier.io
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Scanner: gitleaks
"""
import os
import getpass
import subprocess
import shutil
import tempfile
import traceback
import pkg_resources
import dulwich
from dusty.tools import log
from dusty.commands import git_clone
from dusty.models.module import DependentModuleModel
from dusty.models.scanner import ScannerModel
from .parser import parse_findings
class Scanner(DependentModuleModel, ScannerModel):
""" Scanner class """
def __init__(self, context):
""" Initialize scanner instance """
super().__init__()
self.context = context
self.config = \
self.context.config["scanners"][__name__.split(".")[-3]][__name__.split(".")[-2]]
def execute(self):
""" Run the scanner """
# Squash commits (if needed)
if self.config.get("squash_commits", None):
# Rename old .git
try:
os.rename(
os.path.join(self.config.get("code"), ".git"),
os.path.join(self.config.get("code"), ".git.old")
)
except:
log.debug("Failed to rename old .git: %s", traceback.format_exc())
# Initialize new repo
current_dir = os.getcwd()
try:
os.chdir(self.config.get("code"))
# Patch dulwich to work without valid UID/GID
dulwich.repo.__original__get_default_identity = dulwich.repo._get_default_identity # pylint: disable=W0212
dulwich.repo._get_default_identity = git_clone._dulwich_repo_get_default_identity # pylint: disable=W0212
# Set USERNAME if needed
try:
getpass.getuser()
except: # pylint: disable=W0702
os.environ["USERNAME"] = "git"
# Add current code
repository = dulwich.porcelain.init(self.config.get("code"))
repository._put_named_file(os.path.join("info", "exclude"), b"/.git.old/") # pylint: disable=W0212
dulwich.porcelain.add(repository)
log.debug("Git repository status: %s", dulwich.porcelain.status(repository, True))
dulwich.porcelain.commit(
repository,
b"Current project code", b"Carrier <dusty@localhost>"
)
finally:
os.chdir(current_dir)
# Make temporary files
output_file_fd, output_file = tempfile.mkstemp(".json")
log.debug("Output file: %s", output_file)
os.close(output_file_fd)
additional_options = list()
if self.config.get("redact_offenders", None):
additional_options.append("--redact")
# Use custom rules
if self.config.get("use_custom_rules", None):
custom_rules_path = self.config.get("custom_rules_path", None)
if custom_rules_path:
config_path = custom_rules_path
else:
config_path = pkg_resources.resource_filename(
"dusty",
f"{'/'.join(__name__.split('.')[1:-1])}/data/gitleaks.toml")
additional_options.append("--config")
additional_options.append(config_path)
log.debug("Custom config path: %s", config_path)
# Run task
task = subprocess.run(
[
"gitleaks", "--repo-path", self.config.get("code"), "--report", output_file
] + additional_options,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
log.log_subprocess_result(task)
# Parse findings
parse_findings(output_file, self)
# Save intermediates
self.save_intermediates(output_file, task)
# Revert commit squashing (if any)
if self.config.get("squash_commits", None):
shutil.rmtree(os.path.join(self.config.get("code"), ".git"))
try:
os.rename(
os.path.join(self.config.get("code"), ".git.old"),
os.path.join(self.config.get("code"), ".git")
)
except:
log.debug("Failed to revert .git: %s", traceback.format_exc())
def save_intermediates(self, output_file, task):
""" Save scanner intermediates """
if self.config.get("save_intermediates_to", None):
log.info("Saving intermediates")
base = os.path.join(self.config.get("save_intermediates_to"), __name__.split(".")[-2])
try:
# Make directory for artifacts
os.makedirs(base, mode=0o755, exist_ok=True)
# Save report
shutil.copyfile(
output_file,
os.path.join(base, "report.json")
)
# Save output
with open(os.path.join(base, "output.stdout"), "w") as output:
output.write(task.stdout.decode("utf-8", errors="ignore"))
with open(os.path.join(base, "output.stderr"), "w") as output:
output.write(task.stderr.decode("utf-8", errors="ignore"))
except:
log.exception("Failed to save intermediates")
@staticmethod
def fill_config(data_obj):
""" Make sample config """
data_obj.insert(len(data_obj), "code", "/path/to/code", comment="scan target")
data_obj.insert(
len(data_obj), "squash_commits", False,
comment="(optional) Make one commit with current code only"
)
data_obj.insert(
len(data_obj), "show_offender_line", True,
comment="(optional) Show lines with findings"
)
data_obj.insert(
len(data_obj), "commit_line_limit", 15,
comment="(optional) Limit number of commit lines in one finding. Default: 15"
)
data_obj.insert(
len(data_obj), "redact_offenders", False,
comment="(optional) Hide secrets in lines with findings"
)
data_obj.insert(
len(data_obj), "hide_commit_author", False,
comment="(optional) Hide information about commits and authors"
)
data_obj.insert(
len(data_obj), "use_custom_rules", False,
comment="(optional) Use custom detection rules"
)
data_obj.insert(
len(data_obj), "custom_rules_path", "/path/to/rules",
comment="(optional) Path to custom rules"
)
data_obj.insert(
len(data_obj), "additional_text", "",
comment="(optional) Additional text to add to description"
)
data_obj.insert(
len(data_obj), "save_intermediates_to", "/data/intermediates/sast",
comment="(optional) Save scan intermediates (raw results, logs, ...)"
)
@staticmethod
def validate_config(config):
""" Validate config """
required = ["code"]
not_set = [item for item in required if item not in config]
if not_set:
error = f"Required configuration options not set: {', '.join(not_set)}"
log.error(error)
raise ValueError(error)
@staticmethod
def get_name():
""" Module name """
return "gitleaks"
@staticmethod
def get_description():
""" Module description or help message """
return "gitleaks scanning"