from __future__ import annotations import pytest from pre_commit_hooks.detect_azure_credentials import main from testing.util import get_resource_path @pytest.mark.parametrize( ("filename", "expected_retval"), ( ("azure_credentials.txt", 1), ("azure_no_credentials.txt", 0), ("nonsense.txt", 0), ("ok_json.json", 0), ), ) def test_detect_azure_credentials(filename, expected_retval): """Test detection of Azure credentials in various files.""" ret = main((get_resource_path(filename),)) assert ret == expected_retval def test_detect_multiple_files(): """Test scanning multiple files at once.""" ret = main( ( get_resource_path("azure_credentials.txt"), get_resource_path("azure_no_credentials.txt"), ) ) # Should return 1 because at least one file has credentials assert ret == 1 def test_detect_multiple_credentials_in_single_file(): """Test that multiple credentials in one file are all detected.""" ret = main((get_resource_path("azure_credentials.txt"),)) assert ret == 1 def test_no_credentials_in_multiple_files(): """Test scanning multiple clean files.""" ret = main( ( get_resource_path("azure_no_credentials.txt"), get_resource_path("nonsense.txt"), get_resource_path("ok_json.json"), ) ) assert ret == 0 def test_datafactory_shir_key_detection(): """Test specific detection of Azure Data Factory SHIR keys.""" ret = main((get_resource_path("azure_credentials.txt"),)) assert ret == 1 def test_storage_credential_86char_detection(): """Test detection of 86 character storage credentials.""" ret = main((get_resource_path("azure_credentials.txt"),)) assert ret == 1 def test_storage_credential_43char_detection(): """Test detection of 43 character storage credentials.""" ret = main((get_resource_path("azure_credentials.txt"),)) assert ret == 1 def test_blob_url_with_sas_detection(): """Test detection of blob URLs with SAS tokens.""" ret = main((get_resource_path("azure_credentials.txt"),)) assert ret == 1 def test_userid_password_detection(): """Test detection of userid/password pairs.""" ret = main((get_resource_path("azure_credentials.txt"),)) assert ret == 1 def test_machinekey_detection(): """Test detection of machine keys.""" ret = main((get_resource_path("azure_credentials.txt"),)) assert ret == 1 def test_connection_string_password_detection(): """Test detection of passwords in connection strings.""" ret = main((get_resource_path("azure_credentials.txt"),)) assert ret == 1 def test_network_credential_detection(): """Test detection of network credentials with domains.""" ret = main((get_resource_path("azure_credentials.txt"),)) assert ret == 1 def test_devops_pat_detection(): """Test detection of DevOps Personal Access Tokens.""" ret = main((get_resource_path("azure_credentials.txt"),)) assert ret == 1 def test_app_service_deployment_detection(): """Test detection of App Service deployment secrets.""" ret = main((get_resource_path("azure_credentials.txt"),)) assert ret == 1 def test_allows_arbitrarily_encoded_files(tmpdir): """Test that binary/arbitrarily encoded files don't cause crashes.""" arbitrary_encoding = tmpdir.join("binary_file") arbitrary_encoding.write_binary(b"\x12\x9a\xe2\xf2\xff\xfe") ret = main((str(arbitrary_encoding),)) assert ret == 0 def test_obfuscation_in_output(capsys): """Test that credentials are obfuscated in output.""" ret = main((get_resource_path("azure_credentials.txt"),)) assert ret == 1 out, _ = capsys.readouterr() # Verify output contains filename and pattern name assert "azure_credentials.txt" in out assert "datafactory-shir" in out # Verify the actual credential is obfuscated (contains ***) assert "***" in out # Verify the full credential is NOT in output assert "uUY/w9WdKTdAWWPDMrEEWdAEZIgeXlrO51GtVUR1/BE=" not in out def test_output_format_with_pattern_name(capsys): """Test that output includes pattern name for easier debugging.""" ret = main((get_resource_path("azure_credentials.txt"),)) assert ret == 1 out, _ = capsys.readouterr() # Should mention the file assert "azure_credentials.txt" in out # Should include pattern names in parentheses assert "(" in out and ")" in out def test_empty_file(tmpdir): """Test scanning an empty file.""" empty_file = tmpdir.join("empty.txt") empty_file.write("") ret = main((str(empty_file),)) assert ret == 0 def test_file_with_partial_patterns(tmpdir): """Test that partial/incomplete patterns don't trigger false positives.""" partial = tmpdir.join("partial.txt") partial.write( "# These are incomplete patterns that should NOT match\n" "IR@incomplete\n" "AccountKey=short\n" "password=\n" "sig=toolittledata\n", ) ret = main((str(partial),)) assert ret == 0