1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
|
// Package detector scans text for leaked secrets and credentials.
package detector
import (
"fmt"
"sort"
"strings"
)
// PiiType identifies the kind of finding.
type PiiType string
const (
PiiSecret PiiType = "SECRET"
)
// piiPriority controls overlap resolution: higher value wins.
var piiPriority = map[PiiType]int{
PiiSecret: 6,
}
// Finding is a single detection result.
type Finding struct {
Type PiiType
Start int
End int
Text string
Confidence float64
Placeholder string
RuleID string // only set for SECRET findings
}
// Scanner runs enabled detectors over text.
type Scanner struct {
enabledTypes map[PiiType]bool
allTypes bool
}
// NewScanner creates a Scanner. Pass an empty slice to enable all detectors.
func NewScanner(detectors []string) *Scanner {
if len(detectors) == 0 {
return &Scanner{allTypes: true}
}
m := make(map[PiiType]bool, len(detectors))
for _, d := range detectors {
m[PiiType(strings.ToUpper(d))] = true
}
return &Scanner{enabledTypes: m}
}
func (s *Scanner) isEnabled(t PiiType) bool {
if s.allTypes {
return true
}
return s.enabledTypes[t]
}
// Scan returns the anonymised text and the individual findings.
// Findings are sorted by start position (ascending) in the returned slice.
func (s *Scanner) Scan(text string) (string, []Finding) {
return s.ScanWithWhitelist(text, nil)
}
// ScanWithWhitelist behaves like Scan, but suppresses findings whose matched
// text contains any whitelist token (case-insensitive).
func (s *Scanner) ScanWithWhitelist(text string, whitelist []string) (string, []Finding) {
type entry struct {
t PiiType
fn func(string) []Finding
}
detectors := []entry{
{PiiSecret, detectSecrets},
}
var all []Finding
for _, d := range detectors {
if s.isEnabled(d.t) {
all = append(all, d.fn(text)...)
}
}
if len(all) == 0 {
return text, nil
}
all = filterWhitelisted(all, whitelist)
if len(all) == 0 {
return text, nil
}
resolved := resolveOverlaps(all)
// Assign placeholders; identical text → identical placeholder.
counters := map[PiiType]int{}
seenText := map[string]string{}
for i := range resolved {
f := &resolved[i]
if ph, ok := seenText[f.Text]; ok {
f.Placeholder = ph
} else {
counters[f.Type]++
f.Placeholder = fmt.Sprintf("[%s_%d]", string(f.Type), counters[f.Type])
seenText[f.Text] = f.Placeholder
}
}
// Apply replacements right-to-left to preserve byte positions.
byPos := make([]Finding, len(resolved))
copy(byPos, resolved)
sort.Slice(byPos, func(i, j int) bool { return byPos[i].Start > byPos[j].Start })
result := []byte(text)
for _, f := range byPos {
result = append(result[:f.Start], append([]byte(f.Placeholder), result[f.End:]...)...)
}
// Return findings sorted ascending by start position.
sort.Slice(resolved, func(i, j int) bool { return resolved[i].Start < resolved[j].Start })
return string(result), resolved
}
func filterWhitelisted(findings []Finding, whitelist []string) []Finding {
tokens := make([]string, 0, len(whitelist))
for _, w := range whitelist {
w = strings.TrimSpace(strings.ToLower(w))
if w != "" {
tokens = append(tokens, w)
}
}
if len(tokens) == 0 {
return findings
}
filtered := make([]Finding, 0, len(findings))
for _, f := range findings {
txt := strings.ToLower(f.Text)
skip := false
for _, t := range tokens {
if strings.Contains(txt, t) {
skip = true
break
}
}
if !skip {
filtered = append(filtered, f)
}
}
return filtered
}
// resolveOverlaps keeps the highest-priority non-overlapping finding at each position.
func resolveOverlaps(findings []Finding) []Finding {
sort.SliceStable(findings, func(i, j int) bool {
fi, fj := findings[i], findings[j]
if fi.Start != fj.Start {
return fi.Start < fj.Start
}
pi, pj := piiPriority[fi.Type], piiPriority[fj.Type]
if pi != pj {
return pi > pj
}
return (fi.End - fi.Start) > (fj.End - fj.Start)
})
var result []Finding
lastEnd := -1
for _, f := range findings {
if f.Start >= lastEnd {
result = append(result, f)
lastEnd = f.End
} else if len(result) > 0 {
prev := result[len(result)-1]
pi, pj := piiPriority[prev.Type], piiPriority[f.Type]
if pj > pi || (pj == pi && (f.End-f.Start) > (prev.End-prev.Start)) {
result[len(result)-1] = f
lastEnd = f.End
}
}
}
return result
}
|