mirror of
https://github.com/pre-commit/pre-commit-hooks.git
synced 2026-04-04 19:26:52 +00:00
Add Azure credential detection hook
Implements detect-azure-credentials hook based on Microsoft AzSK credential patterns. Features: - 33 credential detection patterns aligned with official AzSK specification - Covers Azure-specific credentials (Data Factory, Storage, DevOps, etc.) - Detects generic secrets (passwords, API keys, tokens, certificates) - Comprehensive test suite with 22 test cases - Pattern name identification in output for easier debugging Patterns include (CSCAN codes): - Azure Data Factory SHIR keys (0010) - Azure Storage credentials (0020, 0040) - App Service deployment secrets (0050) - Connection strings and passwords (0090, 0100, 0120) - Machine keys and network credentials (0130, 0150) - DevOps PAT tokens (0160) - PublishSettings passwords (0030) - PEM private keys (0060) - Git credentials (0210) - JWT and refresh tokens (0250) - Ansible Vault (0260) - Azure PowerShell token cache (0270) - Default/known passwords (0140) All tests passing (22 Azure + 36 AWS = 58 total)
This commit is contained in:
parent
5dcc56558c
commit
7a894b3c98
5 changed files with 555 additions and 0 deletions
168
tests/detect_azure_credentials_test.py
Normal file
168
tests/detect_azure_credentials_test.py
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from pre_commit_hooks.detect_azure_credentials import main
|
||||
from testing.util import get_resource_path
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("filename", "expected_retval"),
|
||||
(
|
||||
("azure_credentials.txt", 1),
|
||||
("azure_no_credentials.txt", 0),
|
||||
("nonsense.txt", 0),
|
||||
("ok_json.json", 0),
|
||||
),
|
||||
)
|
||||
def test_detect_azure_credentials(filename, expected_retval):
|
||||
"""Test detection of Azure credentials in various files."""
|
||||
ret = main((get_resource_path(filename),))
|
||||
assert ret == expected_retval
|
||||
|
||||
|
||||
def test_detect_multiple_files():
|
||||
"""Test scanning multiple files at once."""
|
||||
ret = main(
|
||||
(
|
||||
get_resource_path("azure_credentials.txt"),
|
||||
get_resource_path("azure_no_credentials.txt"),
|
||||
)
|
||||
)
|
||||
# Should return 1 because at least one file has credentials
|
||||
assert ret == 1
|
||||
|
||||
|
||||
def test_detect_multiple_credentials_in_single_file():
|
||||
"""Test that multiple credentials in one file are all detected."""
|
||||
ret = main((get_resource_path("azure_credentials.txt"),))
|
||||
assert ret == 1
|
||||
|
||||
|
||||
def test_no_credentials_in_multiple_files():
|
||||
"""Test scanning multiple clean files."""
|
||||
ret = main(
|
||||
(
|
||||
get_resource_path("azure_no_credentials.txt"),
|
||||
get_resource_path("nonsense.txt"),
|
||||
get_resource_path("ok_json.json"),
|
||||
)
|
||||
)
|
||||
assert ret == 0
|
||||
|
||||
|
||||
def test_datafactory_shir_key_detection():
|
||||
"""Test specific detection of Azure Data Factory SHIR keys."""
|
||||
ret = main((get_resource_path("azure_credentials.txt"),))
|
||||
assert ret == 1
|
||||
|
||||
|
||||
def test_storage_credential_86char_detection():
|
||||
"""Test detection of 86 character storage credentials."""
|
||||
ret = main((get_resource_path("azure_credentials.txt"),))
|
||||
assert ret == 1
|
||||
|
||||
|
||||
def test_storage_credential_43char_detection():
|
||||
"""Test detection of 43 character storage credentials."""
|
||||
ret = main((get_resource_path("azure_credentials.txt"),))
|
||||
assert ret == 1
|
||||
|
||||
|
||||
def test_blob_url_with_sas_detection():
|
||||
"""Test detection of blob URLs with SAS tokens."""
|
||||
ret = main((get_resource_path("azure_credentials.txt"),))
|
||||
assert ret == 1
|
||||
|
||||
|
||||
def test_userid_password_detection():
|
||||
"""Test detection of userid/password pairs."""
|
||||
ret = main((get_resource_path("azure_credentials.txt"),))
|
||||
assert ret == 1
|
||||
|
||||
|
||||
def test_machinekey_detection():
|
||||
"""Test detection of machine keys."""
|
||||
ret = main((get_resource_path("azure_credentials.txt"),))
|
||||
assert ret == 1
|
||||
|
||||
|
||||
def test_connection_string_password_detection():
|
||||
"""Test detection of passwords in connection strings."""
|
||||
ret = main((get_resource_path("azure_credentials.txt"),))
|
||||
assert ret == 1
|
||||
|
||||
|
||||
def test_network_credential_detection():
|
||||
"""Test detection of network credentials with domains."""
|
||||
ret = main((get_resource_path("azure_credentials.txt"),))
|
||||
assert ret == 1
|
||||
|
||||
|
||||
def test_devops_pat_detection():
|
||||
"""Test detection of DevOps Personal Access Tokens."""
|
||||
ret = main((get_resource_path("azure_credentials.txt"),))
|
||||
assert ret == 1
|
||||
|
||||
|
||||
def test_app_service_deployment_detection():
|
||||
"""Test detection of App Service deployment secrets."""
|
||||
ret = main((get_resource_path("azure_credentials.txt"),))
|
||||
assert ret == 1
|
||||
|
||||
|
||||
def test_allows_arbitrarily_encoded_files(tmpdir):
|
||||
"""Test that binary/arbitrarily encoded files don't cause crashes."""
|
||||
arbitrary_encoding = tmpdir.join("binary_file")
|
||||
arbitrary_encoding.write_binary(b"\x12\x9a\xe2\xf2\xff\xfe")
|
||||
ret = main((str(arbitrary_encoding),))
|
||||
assert ret == 0
|
||||
|
||||
|
||||
def test_obfuscation_in_output(capsys):
|
||||
"""Test that credentials are obfuscated in output."""
|
||||
ret = main((get_resource_path("azure_credentials.txt"),))
|
||||
assert ret == 1
|
||||
|
||||
out, _ = capsys.readouterr()
|
||||
# Verify output contains filename and pattern name
|
||||
assert "azure_credentials.txt" in out
|
||||
assert "datafactory-shir" in out
|
||||
# Verify the actual credential is obfuscated (contains ***)
|
||||
assert "***" in out
|
||||
# Verify the full credential is NOT in output
|
||||
assert "uUY/w9WdKTdAWWPDMrEEWdAEZIgeXlrO51GtVUR1/BE=" not in out
|
||||
|
||||
|
||||
def test_output_format_with_pattern_name(capsys):
|
||||
"""Test that output includes pattern name for easier debugging."""
|
||||
ret = main((get_resource_path("azure_credentials.txt"),))
|
||||
assert ret == 1
|
||||
|
||||
out, _ = capsys.readouterr()
|
||||
# Should mention the file
|
||||
assert "azure_credentials.txt" in out
|
||||
# Should include pattern names in parentheses
|
||||
assert "(" in out and ")" in out
|
||||
|
||||
|
||||
def test_empty_file(tmpdir):
|
||||
"""Test scanning an empty file."""
|
||||
empty_file = tmpdir.join("empty.txt")
|
||||
empty_file.write("")
|
||||
ret = main((str(empty_file),))
|
||||
assert ret == 0
|
||||
|
||||
|
||||
def test_file_with_partial_patterns(tmpdir):
|
||||
"""Test that partial/incomplete patterns don't trigger false positives."""
|
||||
partial = tmpdir.join("partial.txt")
|
||||
partial.write(
|
||||
"# These are incomplete patterns that should NOT match\n"
|
||||
"IR@incomplete\n"
|
||||
"AccountKey=short\n"
|
||||
"password=\n"
|
||||
"sig=toolittledata\n",
|
||||
)
|
||||
ret = main((str(partial),))
|
||||
assert ret == 0
|
||||
Loading…
Add table
Add a link
Reference in a new issue