Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
c500227
dlp inference notebook & assets
tzemicheal Jun 5, 2025
37f9f58
pipeline files updated, notebook cleaned
tzemicheal Jun 12, 2025
1bfb64e
config file added
tzemicheal Jun 12, 2025
354303f
fix linter, cleanup codes
tzemicheal Jun 12, 2025
bcf4546
style fix
tzemicheal Jun 16, 2025
b8c0259
sytle, comments and pr feedback fixed
tzemicheal Jun 16, 2025
d4bb3a8
Merge branch 'branch-25.06' into tz-dlp-models
tzemicheal Jun 16, 2025
a8f9515
notebook review from hc addressed
tzemicheal Jun 16, 2025
5191f3f
Merge branch 'tz-dlp-models' of github.com:tzemicheal/Morpheus into t…
tzemicheal Jun 16, 2025
ac8c243
style fixed
tzemicheal Jun 16, 2025
e4b8113
style fixed
tzemicheal Jun 16, 2025
9e10ac7
style fixed
tzemicheal Jun 16, 2025
d0168bc
Merge branch 'tz-dlp-models' of github.com:tzemicheal/Morpheus into t…
tzemicheal Jun 16, 2025
750451e
Merge branch 'tz-dlp-models' of github.com:tzemicheal/Morpheus into t…
tzemicheal Jun 16, 2025
e2393ed
style error fix
tzemicheal Jun 16, 2025
34948a4
Merge branch 'tz-dlp-models' of github.com:tzemicheal/Morpheus into t…
tzemicheal Jun 16, 2025
adde44b
Merge branch 'tz-dlp-models' of github.com:tzemicheal/Morpheus into t…
tzemicheal Jun 16, 2025
7c8169f
fix style error
tzemicheal Jun 16, 2025
7861ea2
Merge branch 'tz-dlp-models' of github.com:tzemicheal/Morpheus into t…
tzemicheal Jun 16, 2025
ad9d2d6
Update regex rules
tzemicheal Jul 3, 2025
8cf3feb
Update requirements.txt
tzemicheal Jul 3, 2025
e6cb2a1
fix viz code
tzemicheal Jul 8, 2025
feac8c7
fix all typo and HC PR feedbacks
tzemicheal Jul 9, 2025
f469b8a
Merge branch 'branch-25.06' of github.com:nv-morpheus/Morpheus into t…
tzemicheal Jul 9, 2025
2fd029b
fix style
tzemicheal Jul 9, 2025
56d839a
Add descriiption and cleanup notebook debug codes
tzemicheal Jul 10, 2025
e43e968
revert change to code-workspace
tzemicheal Jul 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions models/validation-inference-scripts/dlp-models/data/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
{
"entity_labels": [
"medical_record_number",
"date_of_birth",
"ssn",
"date",
"first_name",
"email",
"last_name",
"customer_id",
"employee_id",
"name",
"street_address",
"phone_number",
"ipv4",
"credit_card_number",
"license_plate",
"address",
"user_name",
"device_identifier",
"bank_routing_number",
"date_time",
"company_name",
"unique_identifier",
"biometric_identifier",
"account_number",
"city",
"certificate_license_number",
"time",
"postcode",
"vehicle_identifier",
"coordinate",
"country",
"api_key",
"ipv6",
"password",
"health_plan_beneficiary_number",
"national_id",
"tax_id",
"url",
"state",
"swift_bic",
"cvv",
"pin"
],
"type_weights": {
"password": 85,
"credit_card": 90,
"ssn": 95,
"address": 60,
"email": 40,
"phone_us": 45,
"phone_numbers": 45,
"ip_address": 30,
"date": 20,
"api_key": 80,
"customer_id": 65,
"personal": 70,
"financial": 85,
"health": 75,
"api_credentials": 75
}
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,307 @@
{
"medical_record_number": [
"\\b(?:MRN|MR)[-_#:]\\s*['\"]?([A-Za-z0-9-_]{5,20})['\"]?\\b",
"\\bmedical[-_]?record[-_]?number[:=]\\s*['\"]?([A-Za-z0-9-_]{5,20})['\"]?\\b",
"\\b\\d{5,12}\\b"
],
"date_of_birth": [
"\\b(?:dob|date[_\\s]?of[_\\s]?birth|birth[_\\s]?date)[:=]?\\s*\\d{1,2}[\\/\\-\\.]\\d{1,2}[\\/\\-\\.]\\d{2,4}\\b",
"\\b(?:dob|birth[_\\s]?date)[:=]?\\s*\\d{4}[\\/\\-\\.]\\d{2}[\\/\\-\\.]\\d{2}\\b",
"\\b(?:dob|birth[_\\s]?date)[:=]?\\s*\\w{3,9}\\s\\d{1,2},\\s\\d{4}\\b",
"\\b(?:DOB)\\s*[:=]?\\s*\\d{2}\\s*\\w{3,9}\\s*\\d{4}\\b",
"\\b(?:birthdate|bday)[:=]?\\s*\\d{2}-\\w{3}-\\d{4}\\b"
],
"ssn": [
"\\b\\d{3}[\\s|-]\\d{2}[\\s|-]\\d{4}\\b",
"\\b\\d{9}\\b"
],
"date": [
"\\b\\d{1,2}[\\/\\-\\.]\\d{1,2}[\\/\\-\\.]\\d{2,4}\\b",
"\\b\\d{4}[\\/\\-\\.]\\d{2}[\\/\\-\\.]\\d{2}\\b",
"\\b\\w{3,9}\\s\\d{1,2},\\s\\d{4}\\b",
"\\b\\d{1,2}\\s\\w{3,9}\\s\\d{4}\\b",
"\\b\\w{3,9}\\s\\d{4}\\b"
],
"first_name": [
"\\b(?:first[_\\s]?name|First|FName|Given[_\\s]?name|Forename)[:=]?\\s*[A-Z][a-z]+\\b",
"\\b[A-Z][a-z]{1,20}[-'][A-Z][a-z]{1,20}\\b",
"\\b(?:first name|fname)[:=]\\s*['\"]?([A-Za-z-']{2,25})['\"]?\\b"
],
"email": [
"\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}\\b",
"\\bemail[:=]?\\s*[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}\\b",
"\\bE-mail[:=]?\\s*[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}\\b",
"\\bEmailAddress[:=]?\\s*[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}\\b",
"\\b[A-Za-z0-9._%+-]+\\s?\\(at\\)\\s?[A-Za-z0-9.-]+\\.[A-Za-z]{2,}\\b"
],
"last_name": [
"\\b(?:last[_\\s]?name|Last|Surname|LName|Family[_\\s]?name)[:=]?\\s*[A-Z][a-z]+\\b",
"\\b[A-Z][a-z]{1,20}[-'][A-Z][a-z]{1,20}\\b",
"\\b(?:last name|lname)[:=]\\s*['\"]?([A-Za-z-']{2,25})['\"]?\\b"
],
"customer_id": [
"\\bcustomer[_\\s]?id[:=]?\\s*[A-Z0-9]{6,12}\\b",
"\\b(CustID|Client[_\\s]?ID|CID|CustomerNumber)[:=]?\\s*\\w{6,12}\\b",
"\\b(?:CUST|CUS|C)[-_#]\\d{3,10}\\b",
"\\bcustomer[-_]?id[:=]\\s*['\"]?([A-Za-z0-9-_]{3,15})['\"]?\\b",
"\\b[A-Z]{1,3}\\d{4,10}\\b"
],
"employee_id": [
"\\b(?:employee[_\\s]?id|EmpID|Staff[_\\s]?Number|EmployeeNumber|WorkerID)[:=]?\\s*\\w{4,12}\\b",
"\\b(?:E|EMP|EMPLOYEE)[-_#]\\d{3,10}\\b",
"\\b\\d{6,10}\\b",
"\\b[A-Z]\\d{5,8}\\b",
"\\bemployee[-_]?id[:=]\\s*['\"]?([A-Za-z0-9-_]{3,15})['\"]?\\b"
],
"name": [
"\\b(?:name|Full[_\\s]?Name|Person|Contact|Client)[:=]?\\s*[A-Z][a-z]+(?:\\s[A-Z][a-z]+)+\\b",
"\\b[A-Z][a-z]{1,20}\\s+[A-Z][a-z]{1,20}\\b",
"\\b[A-Z][a-z]{1,20}\\s+[A-Z]\\.\\s+[A-Z][a-z]{1,20}\\b",
"\\bname[:=]\\s*['\"]?([A-Za-z \\.-]{2,50})['\"]?\\b",
"\\b[A-Z][a-z]{1,20}\\s+[A-Z][a-z]{1,20}[-][A-Z][a-z]{1,20}\\b"
],
"phone_number": [
"\\(\\d{3}\\)\\s?\\d{3}-\\d{4}",
"\\b\\d{3}-\\d{3}-\\d{4}\\b",
"\\b\\d{10}\\b",
"\\b\\+\\d{1,2}\\s?\\(?\\d{3}\\)?[\\s.-]\\d{3}[\\s.-]\\d{4}\\b",
"\\bPhone[:=]?\\s*\\d{3}[\\s.-]?\\d{3}[\\s.-]?\\d{4}\\b",
"\\b\\d{3}[-. ]?\\d{3}[-. ]?\\d{4}\\b",
"\\b\\(\\d{3}\\)[-. ]?\\d{3}[-. ]?\\d{4}\\b",
"\\b\\+?\\d{1,3}[-. ]?\\d{3}[-. ]?\\d{3}[-. ]?\\d{4}\\b",
"\\b\\d{5}[-. ]?\\d{5}\\b"
],
"ipv4": [
"\\b(?:\\d{1,3}\\.){3}\\d{1,3}\\b",
"\\bIPv4[:=]?\\s*(?:\\d{1,3}\\.){3}\\d{1,3}\\b",
"\\b\\d{1,3}\\s*\\.\\s*\\d{1,3}\\s*\\.\\s*\\d{1,3}\\s*\\.\\s*\\d{1,3}\\b",
"\\b(?:25[0-5]|2[0-4]\\d|[01]?\\d?\\d)(?:\\.(?:25[0-5]|2[0-4]\\d|[01]?\\d?\\d)){3}\\b",
"\\bIP[:=]?\\s*(?:\\d{1,3}\\.){3}\\d{1,3}\\b"
],
"credit_card_number": [
"\\b(?:\\d[ -]*?){13,16}\\b",
"\\b\\d{4}[ -]?\\d{4}[ -]?\\d{4}[ -]?\\d{4}\\b",
"\\b(?:Visa|MasterCard|Amex|Discover)[:=]?\\s*\\d{4}[ -]?\\d{4}[ -]?\\d{4}[ -]?\\d{4}\\b",
"\\bCardNumber[:=]?\\s*\\d{13,16}\\b",
"\\b(?:\\d{4}\\s){3}\\d{4}\\b"
],
"license_plate": [
"\\b[A-Z]{1,3}-\\d{1,4}[A-Z]{0,2}\\b",
"\\b\\d{1,4}[A-Z]{1,3}\\b",
"\\b[A-Z]{1,7}\\d{1,4}\\b",
"\\bPlate[:=]?\\s*[A-Z0-9-]{4,10}\\b",
"\\b[A-Z]{1,3}[-\\s]?\\d{1,4}[-\\s]?[A-Z]{0,3}\\b",
"\\b\\d{1,3}[-\\s]?[A-Z]{1,3}[-\\s]?\\d{1,4}\\b",
"\\blicense[-_]?plate[:=]\\s*['\"]?([A-Za-z0-9-_\\s]{2,10})['\"]?\\b"
],
"address": [
"\\b\\d{1,5}\\s+[A-Z][a-z]+(?:\\s[A-Z][a-z]+)*\\s+(Street|St|Avenue|Ave|Road|Rd|Lane|Ln|Drive|Dr)\\b",
"\\bAddress[:=]?\\s*\\d{1,5}\\s+[A-Za-z\\s]+\\s+(Street|St|Avenue|Ave|Rd|Dr|Blvd)\\b",
"\\b\\d+\\s+[A-Z][a-z]+\\s+(St|Ave|Blvd|Rd)\\b",
"\\b\\d+\\s+[A-Za-z\\s]+\\s+(Street|Avenue|Lane|Road|Drive)\\s*(Apt\\.?\\s*\\d+)?\\b",
"\\b\\d+\\s+[A-Za-z0-9\\s]+,\\s*[A-Za-z\\s]+,\\s*[A-Z]{2}\\s*\\d{5}(-\\d{4})?\\b",
"\\baddress[:=]\\s*['\"]?([A-Za-z0-9 \\.,#-]{10,100})['\"]?\\b",
"\\b\\d{1,5}\\s+[A-Za-z0-9 \\.-]{5,50},?\\s+[A-Za-z ]{2,30},?\\s+[A-Za-z ]{2,20}\\s+\\d{5}(?:-\\d{4})?\\b"
],
"user_name": [
"\\busername[:=]?\\s*[A-Za-z0-9._-]{4,}\\b",
"\\buser[:=]?\\s*[A-Za-z0-9._-]{4,}\\b",
"\\buname[:=]?\\s*[A-Za-z0-9._-]{4,}\\b",
"\\bLogin[:=]?\\s*[A-Za-z0-9._-]{4,}\\b",
"\\baccount[:=]?\\s*[A-Za-z0-9._-]{4,}\\b",
"\\buser[-_]?name[:=]\\s*['\"]?([A-Za-z0-9_-]{3,20})['\"]?\\b",
"\\b@[a-zA-Z0-9_-]{3,20}\\b"
],
"device_identifier": [
"\\bDevice[-_\\s]?ID[:=]?\\s*[A-Fa-f0-9:\\-]{10,}\\b",
"\\bIMEI[:=]?\\s*\\d{14,17}\\b",
"\\bSerial[:=]?\\s*[A-Z0-9]{8,}\\b",
"\\bDevice[:=]?\\s*[A-Z0-9]{6,}\\b",
"\\bMAC[:=]?\\s*(?:[A-Fa-f0-9]{2}[:\\-]){5}[A-Fa-f0-9]{2}\\b"
],
"bank_routing_number": [
"\\bRouting[-_\\s]?Number[:=]?\\s*\\d{9}\\b",
"\\bABA[:=]?\\s*\\d{9}\\b",
"\\bRouting[:=]?\\s*\\d{9}\\b",
"\\bBankRoutingID[:=]?\\s*\\d{9}\\b",
"\\b\\d{9}\\s*(?:Routing)?\\s*Number\\b"
],
"date_time": [
"\\b\\d{4}-\\d{2}-\\d{2}[ T]\\d{2}:\\d{2}:\\d{2}\\b",
"\\b\\d{2}/\\d{2}/\\d{4}\\s+\\d{1,2}:\\d{2}(?:\\s*[APap][Mm])?\\b",
"\\b\\w{3,9}\\s\\d{1,2},\\s\\d{4}\\s+at\\s+\\d{1,2}:\\d{2}\\b",
"\\b\\d{1,2}:\\d{2}\\s*[APap][Mm],?\\s*\\d{1,2}[\\/\\-]\\d{1,2}[\\/\\-]\\d{2,4}\\b",
"\\b\\d{2}:\\d{2}:\\d{2}\\s+\\d{4}-\\d{2}-\\d{2}\\b"
],
"company_name": [
"\\b(?:Inc|LLC|Corp|Ltd|Limited|Corporation|Company)\\b",
"\\bcompany[-_]?name[:=]\\s*['\"]?([A-Za-z0-9 \\.-]{2,50})['\"]?\\b"
],
"unique_identifier": [
"\\bUID[:=]?\\s*[A-Za-z0-9\\-]{8,}\\b",
"\\bID[:=]?\\s*[A-Za-z0-9]{8,}\\b",
"\\bUnique[-_ ]?ID[:=]?\\s*[A-Za-z0-9\\-]{6,}\\b",
"\\bUUID[:=]?\\s*[A-Fa-f0-9\\-]{36}\\b",
"\\b[A-Fa-f0-9]{8}\\-[A-Fa-f0-9]{4}\\-[A-Fa-f0-9]{4}\\-[A-Fa-f0-9]{4}\\-[A-Fa-f0-9]{12}\\b"
],
"biometric_identifier": [
"\\b(?:Fingerprint|IrisScan|FaceID|BiometricID|VoicePrint)[:=]?\\s*[A-Za-z0-9+/=]{20,}\\b",
"\\b(?:fingerprint|retina|iris|face|voice)[-_]?id[:=]\\s*['\"]?([A-Za-z0-9-_]{8,30})['\"]?\\b",
"\\b[A-F0-9]{8}-[A-F0-9]{4}-[A-F0-9]{4}-[A-F0-9]{4}-[A-F0-9]{12}\\b",
"\\bbiometric[-_]?data[:=]\\s*['\"]?([A-Za-z0-9-_]{8,30})['\"]?\\b"
],
"account_number": [
"\\bAccount\\s*Number[:=]?\\s*\\d{6,12}\\b",
"\\bAcct\\.?[:=]?\\s*\\d{6,12}\\b",
"\\bAcc#[:=]?\\s*\\d{6,12}\\b",
"\\bAccount[:=]?\\s*\\d{6,12}\\b",
"\\b\\d{6,12}\\b\\s*Account\\b",
"\\b\\d{8,17}\\b",
"\\b\\d{4}[-\\s]\\d{4}[-\\s]\\d{4}[-\\s]\\d{4}\\b",
"\\baccount[-_]?number[:=]\\s*['\"]?([A-Za-z0-9-_]{5,20})['\"]?\\b",
"\\bacc[-_]?no[:=]\\s*['\"]?([A-Za-z0-9-_]{5,20})['\"]?\\b"
],
"city": [
"\\bCity[:=]?\\s*[A-Z][a-z]+(?:\\s[A-Z][a-z]+)*\\b",
"\\bLocated in\\s+[A-Z][a-z]+(?:\\s[A-Z][a-z]+)*\\b",
"\\bLives in\\s+[A-Z][a-z]+(?:\\s[A-Z][a-z]+)*\\b",
"\\bResides in\\s+[A-Z][a-z]+(?:\\s[A-Z][a-z]+)*\\b",
"\\bcity[:=]\\s*['\"]?([A-Za-z \\.-]{2,30})['\"]?\\b"
],
"certificate_license_number": [
"\\bLicense\\s*No\\.?[:=]?\\s*[A-Za-z0-9\\-]{6,}\\b",
"\\bCert(?:ificate)?\\s*ID[:=]?\\s*[A-Za-z0-9\\-]{6,}\\b",
"\\bLic#[:=]?\\s*[A-Za-z0-9]{6,}\\b",
"\\bReg\\.?\\s*No[:=]?\\s*[A-Za-z0-9]{6,}\\b",
"\\bPermit\\s*Number[:=]?\\s*[A-Za-z0-9]{6,}\\b",
"\\b(?:Certificate|License)\\s+(?:Number|No|#)[:=]\\s*['\"]?([A-Za-z0-9-_]{5,20})['\"]?\\b",
"\\b[A-Z]{1,3}-\\d{5,10}\\b",
"\\b\\d{2}-\\d{5,8}\\b",
"\\b[A-Z]\\d{6,10}[A-Z]\\b"
],
"time": [
"\\b\\d{1,2}:\\d{2}\\s*[APap][Mm]\\b",
"\\b\\d{2}:\\d{2}:\\d{2}\\b",
"\\bTime[:=]?\\s*\\d{1,2}:\\d{2}\\b",
"\\b\\d{1,2}\\s*(AM|PM)\\b",
"\\b\\d{2}h\\d{2}\\b"
],
"postcode": [
"\\b\\d{5}(?:-\\d{4})?\\b",
"\\bZip[:=]?\\s*\\d{5}(-\\d{4})?\\b",
"\\bPostal\\s*Code[:=]?\\s*\\d{5}(?:-\\d{4})?\\b",
"\\b\\d{3}\\s?\\d{2}\\b",
"\\b\\d{4,6}\\b"
],
"vehicle_identifier": [
"\\bVIN[:=]?\\s*[A-HJ-NPR-Z0-9]{17}\\b",
"\\bVehicle\\s*ID[:=]?\\s*[A-HJ-NPR-Z0-9]{17}\\b",
"\\bChassis\\s*Number[:=]?\\s*[A-HJ-NPR-Z0-9]{17}\\b",
"\\bEngine\\s*No\\.?[:=]?\\s*[A-Za-z0-9]{6,}\\b",
"\\bV\\.?I\\.?N\\.?[:=]?\\s*[A-HJ-NPR-Z0-9]{17}\\b",
"\\b[A-HJ-NPR-Z0-9]{17}\\b",
"\\bVIN[:=]\\s*['\"]?([A-HJ-NPR-Z0-9]{17})['\"]?\\b",
"\\bvehicle[-_]?id[:=]\\s*['\"]?([A-Za-z0-9-_]{5,20})['\"]?\\b"
],
"coordinate": [
"\\b[-+]?\\d{1,3}\\.\\d+,\\s*[-+]?\\d{1,3}\\.\\d+\\b",
"\\bLat[:=]?\\s*-?\\d{1,2}\\.\\d+\\s*Long[:=]?\\s*-?\\d{1,3}\\.\\d+\\b",
"\\bLatitude[:=]?\\s*-?\\d{1,2}\\.\\d+\\s*Longitude[:=]?\\s*-?\\d{1,3}\\.\\d+\\b",
"\\b\\(\\s*-?\\d{1,3}\\.\\d+\\s*,\\s*-?\\d{1,3}\\.\\d+\\s*\\)\\b",
"\\bGPS\\s*Coord(?:inate)?s?[:=]?\\s*-?\\d{1,3}\\.\\d+\\s*,\\s*-?\\d{1,3}\\.\\d+\\b"
],
"country": [
"\\bCountry[:=]?\\s*[A-Z][a-z]+(?:\\s[A-Z][a-z]+)*\\b",
"\\bNationality[:=]?\\s*[A-Z][a-z]+\\b",
"\\bFrom\\s+[A-Z][a-z]+(?:\\s[A-Z][a-z]+)*\\b",
"\\bCitizenship[:=]?\\s*[A-Z][a-z]+\\b",
"\\bResident of\\s+[A-Z][a-z]+\\b",
"\\b[A-Z]{2,3}\\b",
"\\bcountry[:=]\\s*['\"]?([A-Za-z ]{2,56})['\"]?\\b"
],
"api_key": [
"\\b(?:Key|Token|Secret|Auth)[:=]?\\s*[A-Za-z0-9\\-_]{20,}\\b",
"\\b(?:[A-Za-z0-9+/]{40,})\\b",
"\\b[A-Za-z0-9]{32,}\\b",
"\\b(?:key|api[-_]?key|access[-_]?token)[-_]?['\"]?([A-Za-z0-9_\\-\\.]{16,})['\"]?\\b",
"\\b[a-zA-Z0-9_]{8,}\\.[a-zA-Z0-9_]{8,}\\.[a-zA-Z0-9_-]{8,}\\b"
],
"ipv6": [
"(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}",
"((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)::((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)",
"::1",
"\\bIPv6[:=]?\\s*['\"]?([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}['\"]?\\b"
],
"password": [
"\\bPassword[:=]?\\s*.+\\b",
"\\bPwd[:=]?\\s*[^\\s]{6,}\\b",
"\\bPass[:=]?\\s*[^\\s]{6,}\\b",
"\\bUserPass[:=]?\\s*[^\\s]{6,}\\b",
"\\bSecret[:=]?\\s*[^\\s]{6,}\\b",
"\\b(?:password|pwd|pass)[:=]\\s*['\"]?([^\\s\"']{6,30})['\"]?\\b"
],
"health_plan_beneficiary_number": [
"\\bHPBN[:=]?\\s*\\d{9,12}\\b",
"\\bBeneficiary\\s*ID[:=]?\\s*\\d{9,12}\\b",
"\\bHealth\\s*Plan\\s*No\\.?[:=]?\\s*\\d{9,12}\\b",
"\\bPlanID[:=]?\\s*\\d{9,12}\\b",
"\\bInsurance[:=]?\\s*\\d{9,12}\\b",
"\\b[A-Z]\\d{8}\\b",
"\\b\\d{3}-\\d{2}-\\d{4}\\b",
"\\bhealth[-_]?plan[-_]?(?:number|id|no)[:=]\\s*['\"]?([A-Za-z0-9-_]{5,20})['\"]?\\b",
"\\b[A-Z]{1,5}\\d{6,12}\\b"
],
"national_id": [
"\\bNID[:=]?\\s*\\d{6,14}\\b",
"\\bNational\\s*ID[:=]?\\s*\\d{6,14}\\b",
"\\bNatID[:=]?\\s*\\d{6,14}\\b",
"\\bCitizen\\s*ID[:=]?\\s*\\d{6,14}\\b",
"\\bIdentity\\s*Number[:=]?\\s*\\d{6,14}\\b"
],
"tax_id": [
"\\bTIN[:=]?\\s*\\d{9}\\b",
"\\bTax\\s*ID[:=]?\\s*\\d{9}\\b",
"\\bEIN[:=]?\\s*\\d{2}-\\d{7}\\b",
"\\bTaxpayer\\s*Number[:=]?\\s*\\d{9}\\b",
"\\b\\d{3}-\\d{2}-\\d{4}\\s*\\(Tax\\)\\b"
],
"url": [
"\\bhttps?:\\/\\/[^\\s]+\\b",
"\\bwww\\.[A-Za-z0-9\\-\\.]+\\.[a-z]{2,}\\b",
"\\bURL[:=]?\\s*https?:\\/\\/[^\\s]+\\b",
"\\bLink[:=]?\\s*https?:\\/\\/[^\\s]+\\b",
"\\b(?:http|https):\\/\\/\\S+\\b"
],
"state": [
"\\bState[:=]?\\s*[A-Z][a-z]+\\b",
"\\bLives in\\s+[A-Z][a-z]+\\b",
"\\bLocated in\\s+[A-Z][a-z]+\\b",
"\\b[A-Z]{2}\\s*State\\b",
"\\b\\([A-Z]{2}\\)\\b",
"\\bstate[:=]\\s*['\"]?([A-Za-z ]{2,20})['\"]?\\b"
],
"swift_bic": [
"\\bSWIFT[:=]?\\s*[A-Z]{6}[A-Z0-9]{2}([A-Z0-9]{3})?\\b",
"\\bBIC[:=]?\\s*[A-Z]{6}[A-Z0-9]{2,5}\\b",
"\\bSWIFT-BIC[:=]?\\s*[A-Z0-9]{8,11}\\b",
"\\bSWIFT Code[:=]?\\s*[A-Z0-9]{8,11}\\b",
"\\b\\b[A-Z]{4}[A-Z]{2}[A-Z0-9]{2}([A-Z0-9]{3})?\\b"
],
"cvv": [
"\\bCVV[:=]?\\s*\\d{3,4}\\b",
"\\bSecurity\\s*Code[:=]?\\s*\\d{3,4}\\b",
"\\bCVC[:=]?\\s*\\d{3,4}\\b",
"\\bCard\\s*Verification[:=]?\\s*\\d{3,4}\\b",
"\\b\\d{3,4}\\s*\\(CVV\\)\\b",
"\\b\\d{3,4}\\b",
"\\b(?:CVV|CVC|CVV2|CID)[:=]\\s*['\"]?(\\d{3,4})['\"]?\\b",
"\\bsecurity code[:=]\\s*['\"]?(\\d{3,4})['\"]?\\b"
],
"pin": [
"\\bPIN[:=]?\\s*\\d{4,6}\\b",
"\\bPin\\s*Code[:=]?\\s*\\d{4,6}\\b",
"\\bAccess\\s*Code[:=]?\\s*\\d{4,6}\\b",
"\\b\\d{4,6}\\s*\\(PIN\\)\\b",
"\\bPasscode[:=]?\\s*\\d{4,6}\\b"
]
}
Loading