diff --git a/README.md b/README.md index c64f8fb5..4df19ffd 100644 --- a/README.md +++ b/README.md @@ -254,6 +254,11 @@ Raspberry Pi. **Please Note(3)**: If need to do a rebuild and want to clean up everything, run the following command: +**Please Note(4)**: To build the CROWler VDI docker image, it's required to +build also Selenium (don't worry everything is automatic), however you need +to ensure that GNU Make is installed on your system. That is required to +build selenium images (nothing to do with the CROWler itself). + ```bash ./docker-rebuild.sh ``` diff --git a/go.mod b/go.mod index 924f8511..8cc5f230 100644 --- a/go.mod +++ b/go.mod @@ -23,6 +23,7 @@ require ( require ( github.com/Ullaakut/nmap/v3 v3.0.3 github.com/jmoiron/sqlx v1.4.0 + github.com/spaolacci/murmur3 v1.1.0 golang.org/x/crypto v0.24.0 ) diff --git a/go.sum b/go.sum index 23ac9bfa..324507f7 100644 --- a/go.sum +++ b/go.sum @@ -95,6 +95,8 @@ github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBO github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ= github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= +github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= +github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= @@ -169,6 +171,8 @@ golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9sn golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= +golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= diff --git a/pkg/common/enconding.go b/pkg/common/enconding.go new file mode 100644 index 00000000..d439b801 --- /dev/null +++ b/pkg/common/enconding.go @@ -0,0 +1,11 @@ +package common + +import ( + "encoding/base64" +) + +// Base64Encode encodes a string to base64, this may be required by some +// configurations. +func Base64Encode(data string) string { + return base64.StdEncoding.EncodeToString([]byte(data)) +} diff --git a/pkg/config/config.go b/pkg/config/config.go index cedb2afb..5f619a98 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -253,9 +253,25 @@ func NewConfig() *Config { SSLMode: "disable", }, HTTPHeaders: HTTPConfig{ - Enabled: true, - Timeout: 60, - SSLDiscovery: true, + Enabled: true, + Timeout: 60, + SSLDiscovery: SSLScoutConfig{ + Enabled: true, + JARM: false, + JA3: false, + JA3S: true, + HASSH: false, + HASSHServer: true, + TLSH: true, + SimHash: true, + MinHash: true, + BLAKE2: true, + SHA256: true, + CityHash: true, + MurmurHash: true, + CustomTLS: true, + }, + Proxies: []SOCKSProxy{}, }, NetworkInfo: NetworkInfo{ DNS: DNSConfig{ @@ -1014,7 +1030,7 @@ func IsEmpty(config Config) bool { return false } - if config.HTTPHeaders != (HTTPConfig{}) { + if !config.HTTPHeaders.IsEmpty() { return false } @@ -1063,7 +1079,7 @@ func (ssc *ServiceScoutConfig) IsEmpty() bool { return true } -// isEmpty checks if the given ExecutionPlanItem is empty. +// IsEmpty checks if the given ExecutionPlanItem is empty. func (ep *ExecutionPlanItem) IsEmpty() bool { if ep == nil { return true @@ -1076,7 +1092,7 @@ func (ep *ExecutionPlanItem) IsEmpty() bool { return false } -// isEmpty checks if the given SourceConfig is empty. +// IsEmpty checks if the given SourceConfig is empty. func (sc *SourceConfig) IsEmpty() bool { if sc == nil { return true @@ -1089,6 +1105,7 @@ func (sc *SourceConfig) IsEmpty() bool { return true } +// IsEmpty checks if the given Config is empty. func (cfg *Config) IsEmpty() bool { if cfg == nil { return true @@ -1122,7 +1139,7 @@ func (cfg *Config) IsEmpty() bool { return false } - if cfg.HTTPHeaders != (HTTPConfig{}) { + if !cfg.HTTPHeaders.IsEmpty() { return false } @@ -1144,3 +1161,112 @@ func (cfg *Config) IsEmpty() bool { return true } + +// IsEmpty checks if the given DNSConfig is empty. +func (dc *DNSConfig) IsEmpty() bool { + if dc == nil { + return true + } + + if dc.Enabled { + return false + } + + if dc.Timeout != 0 { + return false + } + + if dc.RateLimit != "" { + return false + } + + return true +} + +// IsEmpty checks if the given WHOISConfig is empty. +func (wc *WHOISConfig) IsEmpty() bool { + if wc == nil { + return true + } + + if wc.Enabled { + return false + } + + if wc.Timeout != 0 { + return false + } + + if wc.RateLimit != "" { + return false + } + + return true +} + +// IsEmpty checks if the given NetLookupConfig is empty. +func (nlc *NetLookupConfig) IsEmpty() bool { + if nlc == nil { + return true + } + + if nlc.Enabled { + return false + } + + if nlc.Timeout != 0 { + return false + } + + if nlc.RateLimit != "" { + return false + } + + return true +} + +// IsEmpty checks if the given GeoLookupConfig is empty. +func (glc *GeoLookupConfig) IsEmpty() bool { + if glc == nil { + return true + } + + if glc.Enabled { + return false + } + + if glc.Type != "" { + return false + } + + if glc.DBPath != "" { + return false + } + + return true +} + +// IsEmpty checks if the given HTTPConfig is empty. +func (hc *HTTPConfig) IsEmpty() bool { + if hc == nil { + return true + } + + if hc.Enabled { + return false + } + + if hc.Timeout != 0 { + return false + } + + if hc.SSLDiscovery != (SSLScoutConfig{}) { + return false + } + + if len(hc.Proxies) != 0 { + return false + } + + return true +} diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index b6c6e1fa..41bb5459 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -1204,7 +1204,7 @@ func TestConfigString(t *testing.T) { } // Define the expected string representation of the config - expected := "Config{Remote: {https://example.com /api 8080 us-west-1 mytoken 0 }, Database: { 0 testuser testpassword 0 0 }, Crawler: {0 0 0 false false 0 0 0 0 0 0 false false false false false false 0 false}, API: { 0 0 false false false 0 0 0 false}, Selenium: [{ chrome 4444 false false }], RulesetsSchemaPath: path/to/schema, Rulesets: [], ImageStorageAPI: { 0 0 }, FileStorageAPI: { 0 0 }, HTTPHeaders: {false 0 false false}, NetworkInfo: {{false 0 } {false 0 } {false 0 } {false 0 { 0} false false false false false false false false [] [] [] 0 0 0 false 0 false false 0 [] []} {false 0 } { }}, OS: linux, DebugLevel: 1}" + expected := "Config{Remote: {https://example.com /api 8080 us-west-1 mytoken 0 }, Database: { 0 testuser testpassword 0 0 }, Crawler: {0 0 0 false false 0 0 0 0 0 0 false false false false false false 0 false}, API: { 0 0 false false false 0 0 0 false}, Selenium: [{ chrome 4444 false false }], RulesetsSchemaPath: path/to/schema, Rulesets: [], ImageStorageAPI: { 0 0 }, FileStorageAPI: { 0 0 }, HTTPHeaders: {false 0 false {false false false false false false false false false false false false false false} []}, NetworkInfo: {{false 0 } {false 0 } {false 0 } {false 0 { 0} false false false false false false false false [] [] [] 0 0 0 false 0 false false 0 [] []} {false 0 } { }}, OS: linux, DebugLevel: 1}" // Call the String method on the config result := config.String() diff --git a/pkg/config/types.go b/pkg/config/types.go index 8dbbefc8..86f60ca7 100644 --- a/pkg/config/types.go +++ b/pkg/config/types.go @@ -102,10 +102,34 @@ type GeoLookupConfig struct { } type HTTPConfig struct { - Enabled bool `yaml:"enabled"` - Timeout int `yaml:"timeout"` - FollowRedirects bool `yaml:"follow_redirects"` - SSLDiscovery bool `yaml:"ssl_discovery"` + Enabled bool `yaml:"enabled"` + Timeout int `yaml:"timeout"` + FollowRedirects bool `yaml:"follow_redirects"` + SSLDiscovery SSLScoutConfig `yaml:"ssl_discovery"` + Proxies []SOCKSProxy `yaml:"proxies"` +} + +type SSLScoutConfig struct { + Enabled bool `yaml:"enabled"` + JARM bool `yaml:"jarm"` + JA3 bool `yaml:"ja3"` + JA3S bool `yaml:"ja3s"` + HASSH bool `yaml:"hassh"` + HASSHServer bool `yaml:"hassh_server"` + TLSH bool `yaml:"tlsh"` + SimHash bool `yaml:"simhash"` + MinHash bool `yaml:"minhash"` + BLAKE2 bool `yaml:"blake2"` + SHA256 bool `yaml:"sha256"` + CityHash bool `yaml:"cityhash"` + MurmurHash bool `yaml:"murmurhash"` + CustomTLS bool `yaml:"custom_tls"` +} + +type SOCKSProxy struct { + Address string + Username string + Password string } // ServiceScoutConfig represents a structured configuration for an Nmap scan. diff --git a/pkg/crawler/action_rules.go b/pkg/crawler/action_rules.go index c57f0a9c..aaaf5251 100644 --- a/pkg/crawler/action_rules.go +++ b/pkg/crawler/action_rules.go @@ -309,11 +309,93 @@ func executeActionSwitchWindow(r *rules.ActionRule, wd *selenium.WebDriver) erro return (*wd).SwitchWindow(r.Value) } -// TODO: Implement this function (this requires RBee service running on the VDI) -// -// Scroll to an element using Rbee -func executeActionScrollToElement(_ *rules.ActionRule, _ *selenium.WebDriver) error { - return nil +// executeActionScrollToElement is responsible for executing a "scroll to element" action +func executeActionScrollToElement(r *rules.ActionRule, wd *selenium.WebDriver) error { + // Find the element + wdf, selector, err := findElementBySelectorType(wd, r.Selectors) + if err != nil { + cmn.DebugMsg(cmn.DbgLvlDebug3, "No element '%v' found.", err) + err = nil + } + + // If the element is found, attempt to scroll to it using Rbee + if wdf != nil { + loc, err := wdf.Location() + if err != nil { + return fmt.Errorf("failed to get element location: %v", err) + } + + // JavaScript to send a POST request to Rbee for scrolling to the element + jsScript := fmt.Sprintf(` + (function() { + var xhr = new XMLHttpRequest(); + xhr.open("POST", "http://localhost:3000/v1/rb", true); + xhr.setRequestHeader("Content-Type", "application/json;charset=UTF-8"); + var data = JSON.stringify({ + "Action": "moveMouse", + "X": %d, + "Y": %d + }); + xhr.onreadystatechange = function () { + if (xhr.readyState === 4 && xhr.status === 200) { + var scrollXhr = new XMLHttpRequest(); + scrollXhr.open("POST", "http://localhost:3000/v1/rb", true); + scrollXhr.setRequestHeader("Content-Type", "application/json;charset=UTF-8"); + var scrollData = JSON.stringify({ + "Action": "scroll" + }); + scrollXhr.onreadystatechange = function () { + if (scrollXhr.readyState === 4 && scrollXhr.status === 200) { + console.log("done."); + return true; + } else if (scrollXhr.readyState === 4) { + console.error("Failed: " + scrollXhr.responseText); + return false; + } + }; + scrollXhr.send(scrollData); + } else if (xhr.readyState === 4) { + console.error("Failed: " + xhr.responseText); + return false; + } + }; + xhr.send(data); + })(); + `, loc.X, loc.Y) + + // Execute the JavaScript in the browser context + success, err := (*wd).ExecuteScript(jsScript, nil) + if err == nil && success == true { + cmn.DebugMsg(cmn.DbgLvlDebug3, "Scroll to element action executed successfully using Rbee") + return nil + } else { + cmn.DebugMsg(cmn.DbgLvlDebug3, "Failed to execute scroll to element using Rbee, falling back to Selenium") + } + + // Fall back to using Selenium's ExecuteScript method + scrollScript := fmt.Sprintf(` + (function() { + var element = document.querySelector("%s"); + if (element) { + element.scrollIntoView({ behavior: 'smooth', block: 'center' }); + return true; + } else { + return false; + } + })(); + `, selector.Value) + + success, err = (*wd).ExecuteScript(scrollScript, nil) + if err != nil { + return fmt.Errorf("failed to scroll to element using Selenium: %v", err) + } + + if success != true { + return fmt.Errorf("element not found for scrolling") + } + } + + return err } func executeActionScrollByAmount(r *rules.ActionRule, wd *selenium.WebDriver) error { @@ -346,6 +428,8 @@ func executeWaitCondition(ctx *processContext, r *rules.WaitCondition, wd *selen // executeActionClick is responsible for executing a "click" action func executeActionClick(r *rules.ActionRule, wd *selenium.WebDriver) error { + var err error + // Find the element wdf, _, err := findElementBySelectorType(wd, r.Selectors) if err != nil { @@ -353,11 +437,66 @@ func executeActionClick(r *rules.ActionRule, wd *selenium.WebDriver) error { err = nil } - // If the element is found, click it + // If the element is found, attempt to move the mouse and click using Rbee if wdf != nil { - err := wdf.Click() + loc, err := wdf.Location() + if err != nil { + return fmt.Errorf("failed to get element location: %v", err) + } + + // JavaScript to send a POST request to Rbee for mouse move and click + jsScript := fmt.Sprintf(` + (function() { + var xhr = new XMLHttpRequest(); + xhr.open("POST", "http://localhost:3000/v1/rb", true); + xhr.setRequestHeader("Content-Type", "application/json;charset=UTF-8"); + var data = JSON.stringify({ + "Action": "moveMouse", + "X": %d, + "Y": %d + }); + xhr.onreadystatechange = function () { + if (xhr.readyState === 4 && xhr.status === 200) { + var clickXhr = new XMLHttpRequest(); + clickXhr.open("POST", "http://localhost:3000/v1/rb", true); + clickXhr.setRequestHeader("Content-Type", "application/json;charset=UTF-8"); + var clickData = JSON.stringify({ + "Action": "click" + }); + clickXhr.onreadystatechange = function () { + if (clickXhr.readyState === 4 && clickXhr.status === 200) { + console.log("done."); + return true; // Clicked successfully + } else if (clickXhr.readyState === 4) { + console.error("Failed: " + clickXhr.responseText); + return false; // Failed to click + } + }; + clickXhr.send(clickData); + } else if (xhr.readyState === 4) { + console.error("Failed: " + xhr.responseText); + return false; // Failed to move mouse + } + }; + xhr.send(data); + })(); + `, loc.X, loc.Y) + + // Execute the JavaScript in the browser context + var success interface{} + success, err = (*wd).ExecuteScript(jsScript, nil) + if err == nil && success == true { + cmn.DebugMsg(cmn.DbgLvlDebug3, "Mouse move and click action executed successfully using Rbee") + return nil + } else { + cmn.DebugMsg(cmn.DbgLvlDebug3, "Failed to execute mouse move and click using Rbee, falling back to Selenium") + } + + // Fall back to using Selenium's Click method + err = wdf.Click() return err } + return err } @@ -373,7 +512,54 @@ func executeMoveToElement(r *rules.ActionRule, wd *selenium.WebDriver) error { return err } } - script := ` + + // Get the location of the element + loc, err := wdf.Location() + if err != nil { + cmn.DebugMsg(cmn.DbgLvlError, "getting element location: %v", err) + } + + // Get the size of the element (optional, but useful for debugging) + size, err := wdf.Size() + if err != nil { + cmn.DebugMsg(cmn.DbgLvlError, "getting element size: %v", err) + } + + // Output the element's location and size for debugging + cmn.DebugMsg(cmn.DbgLvlDebug3, "Element location: (%d, %d)\n", loc.X, loc.Y) + cmn.DebugMsg(cmn.DbgLvlDebug3, "Element size: (width: %d, height: %d)\n", size.Width, size.Height) + + script := fmt.Sprintf(` + (function() { + var xhr = new XMLHttpRequest(); + xhr.open("POST", "http://localhost:3000/v1/rb", true); + xhr.setRequestHeader("Content-Type", "application/json;charset=UTF-8"); + var data = JSON.stringify({ + "Action": "moveMouse", + "X": %d, + "Y": %d + }); + xhr.onreadystatechange = function () { + if (xhr.readyState === 4 && xhr.status === 200) { + console.log("done."); + } else if (xhr.readyState === 4) { + console.error("Failed: " + xhr.responseText); + } + }; + xhr.send(data); + })(); + `, loc.X, loc.Y) + + // Move the mouse to the element using Rbee + if err == nil { + // If err is nill then we have all the information we need + // to use human-simulation to move the mouse to the element + _, err = (*wd).ExecuteScript(script, nil) + } + if err != nil { + cmn.DebugMsg(cmn.DbgLvlError, "executing human-simulation script: %v", err) + // Moving human way failed, use Selenium way + script = ` var elem = document.getElementById('` + id + `'); var evt = new MouseEvent('mousemove', { bubbles: true, @@ -383,8 +569,13 @@ func executeMoveToElement(r *rules.ActionRule, wd *selenium.WebDriver) error { view: window }); elem.dispatchEvent(evt); - ` - _, err = (*wd).ExecuteScript(script, nil) + ` + // Move the mouse to the element using Rbee + _, err = (*wd).ExecuteScript(script, nil) + if err != nil { + cmn.DebugMsg(cmn.DbgLvlError, "executing teleport script: %v", err) + } + } return err } @@ -401,11 +592,41 @@ func executeActionScroll(r *rules.ActionRule, wd *selenium.WebDriver) error { attribute = value } - // Use Sprintf to dynamically create the script string with the attribute value - script := fmt.Sprintf("window.scrollTo(0, %s)", attribute) + // JavaScript to send a POST request to Rbee + jsScript := fmt.Sprintf(` + (function() { + var xhr = new XMLHttpRequest(); + xhr.open("POST", "http://localhost:3000/v1/rb", true); + xhr.setRequestHeader("Content-Type", "application/json;charset=UTF-8"); + var data = JSON.stringify({ + "Action": "scroll", + "Value": "%s" + }); + xhr.onreadystatechange = function () { + if (xhr.readyState === 4 && xhr.status === 200) { + console.log("done."); + return true; + } else if (xhr.readyState === 4) { + console.error("Failed: " + xhr.responseText); + return false; + } + }; + xhr.send(data); + })(); + `, attribute) - // Scroll the page - _, err := (*wd).ExecuteScript(script, nil) + // Execute the JavaScript in the browser context + success, err := (*wd).ExecuteScript(jsScript, nil) + if err == nil && success == true { + cmn.DebugMsg(cmn.DbgLvlDebug3, "Scroll action executed successfully using Rbee") + return nil + } else { + cmn.DebugMsg(cmn.DbgLvlDebug3, "Failed to execute scroll using Rbee, falling back to Selenium") + } + + // Fall back to using Selenium's ExecuteScript method + script := fmt.Sprintf("window.scrollTo(0, %s)", attribute) + _, err = (*wd).ExecuteScript(script, nil) return err } @@ -434,18 +655,120 @@ func executeActionJS(ctx *processContext, r *rules.ActionRule, wd *selenium.WebD } // executeActionInput is responsible for executing an "input" action +// Note from Paolo: +// This may looks complex, because it is a complex problem to solve! +// This function tries to move the mouse (human-like) to the element, +// clicks (generating a system level event) on it, and then inputs +// the text using Rbee. 'cause that's what us human do and tools like +// Selenium don't. func executeActionInput(r *rules.ActionRule, wd *selenium.WebDriver) error { + var err error + // Find the element wdf, selector, err := findElementBySelectorType(wd, r.Selectors) if err != nil { cmn.DebugMsg(cmn.DbgLvlDebug3, "No element '%v' found.", err) - err = nil + return nil } - // If the element is found, input the text + // If the element is found, attempt to input the text using Rbee if wdf != nil { - err = wdf.SendKeys(selector.Value) + loc, err := wdf.Location() + if err != nil { + return fmt.Errorf("failed to get element location: %v", err) + } + + // JavaScript to send a POST request to Rbee for mouse move and click + jsScriptMoveAndClick := fmt.Sprintf(` + (function() { + var xhr = new XMLHttpRequest(); + xhr.open("POST", "http://localhost:3000/v1/rb", true); + xhr.setRequestHeader("Content-Type", "application/json;charset=UTF-8"); + var data = JSON.stringify({ + "Action": "moveMouse", + "X": %d, + "Y": %d + }); + xhr.onreadystatechange = function () { + if (xhr.readyState === 4 && xhr.status === 200) { + var clickXhr = new XMLHttpRequest(); + clickXhr.open("POST", "http://localhost:3000/v1/rb", true); + clickXhr.setRequestHeader("Content-Type", "application/json;charset=UTF-8"); + var clickData = JSON.stringify({ + "Action": "click" + }); + clickXhr.onreadystatechange = function () { + if (clickXhr.readyState === 4 && clickXhr.status === 200) { + console.log("done."); + return true; // Clicked successfully + } else if (clickXhr.readyState === 4) { + console.error("Failed: " + clickXhr.responseText); + return false; // Failed to click + } + }; + clickXhr.send(clickData); + } else if (xhr.readyState === 4) { + console.error("Failed: " + xhr.responseText); + return false; // Failed to move mouse + } + }; + xhr.send(data); + })(); + `, loc.X, loc.Y) + + // Execute the JavaScript to move the mouse and click + success, err := (*wd).ExecuteScript(jsScriptMoveAndClick, nil) + if err == nil && success == true { + cmn.DebugMsg(cmn.DbgLvlDebug3, "Mouse move and click action executed successfully using Rbee") + + attribute := selector.Value + + // JavaScript to send a POST request to Rbee for text input + jsScriptType := fmt.Sprintf(` + (function() { + var xhr = new XMLHttpRequest(); + xhr.open("POST", "http://localhost:3000/v1/rb", true); + xhr.setRequestHeader("Content-Type", "application/json;charset=UTF-8"); + var data = JSON.stringify({ + "Action": "type", + "Value": "%s" + }); + xhr.onreadystatechange = function () { + if (xhr.readyState === 4 && xhr.status === 200) { + console.log("done."); + return true; // Typed successfully + } else if (xhr.readyState === 4) { + console.error("Failed: " + xhr.responseText); + return false; // Failed to type + } + }; + xhr.send(data); + })(); + `, attribute) + + // Execute the JavaScript to type the text + success, err := (*wd).ExecuteScript(jsScriptType, nil) + if err == nil && success == true { + cmn.DebugMsg(cmn.DbgLvlDebug3, "Text input action executed successfully using Rbee") + return nil + } else { + cmn.DebugMsg(cmn.DbgLvlDebug3, "Failed to execute text input using Rbee, falling back to Selenium") + } + } else { + cmn.DebugMsg(cmn.DbgLvlDebug3, "Failed to execute mouse move and click using Rbee, falling back to Selenium") + } + + // Fall back to using Selenium's Click and SendKeys methods + err = wdf.Click() + if err != nil { + return fmt.Errorf("failed to click on element: %v", err) + } + + attribute := selector.Value + err = wdf.SendKeys(attribute) + return err } + return err } diff --git a/pkg/crawler/crawler.go b/pkg/crawler/crawler.go index 4c367632..bb77b0fe 100644 --- a/pkg/crawler/crawler.go +++ b/pkg/crawler/crawler.go @@ -599,6 +599,9 @@ func (ctx *processContext) GetHTTPInfo(url string, htmlContent string) { Timeout: ctx.config.HTTPHeaders.Timeout, SSLDiscovery: ctx.config.HTTPHeaders.SSLDiscovery, } + if len(ctx.config.HTTPHeaders.Proxies) > 0 { + c.Proxies = ctx.config.HTTPHeaders.Proxies + } // Call GetHTTPInfo to retrieve HTTP header information cmn.DebugMsg(cmn.DbgLvlInfo, "Gathering HTTP information for %s...", ctx.source.URL) diff --git a/pkg/fingerprints/blake2.go b/pkg/fingerprints/blake2.go new file mode 100644 index 00000000..9ca52115 --- /dev/null +++ b/pkg/fingerprints/blake2.go @@ -0,0 +1,31 @@ +// Copyright 2023 Paolo Fabio Zaino +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fingerprints implements the fingerprints library for the Crowler +package fingerprints + +import ( + "encoding/hex" + + "golang.org/x/crypto/blake2b" +) + +// BLAKE2 implements the Fingerprint interface for BLAKE2 fingerprints. +type BLAKE2 struct{} + +// Compute computes the BLAKE2 fingerprint of a given data. +func (b BLAKE2) Compute(data string) string { + hash := blake2b.Sum256([]byte(data)) + return hex.EncodeToString(hash[:]) +} diff --git a/pkg/fingerprints/cityhash.go b/pkg/fingerprints/cityhash.go new file mode 100644 index 00000000..c5d91595 --- /dev/null +++ b/pkg/fingerprints/cityhash.go @@ -0,0 +1,143 @@ +// Copyright 2023 Paolo Fabio Zaino +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fingerprints implements the fingerprints library for the Crowler +package fingerprints + +import ( + "encoding/binary" + "fmt" +) + +// Constants used in CityHash +const ( + k0 = uint64(0xc3a5c85c97cb3127) + k1 = uint64(0xb492b66fbe98f273) + k2 = uint64(0x9ae16a3b2f90404f) + k3 = uint64(0xc949d7c7509e6557) +) + +// CityHash implements the Fingerprint interface for CityHash fingerprints. +type CityHash struct{} + +func (c CityHash) Compute(data string) string { + return fmt.Sprintf("%x", CityHash64([]byte(data))) +} + +// CityHash64 computes the CityHash64 of the given data +func CityHash64(data []byte) uint64 { + if len(data) <= 16 { + return hashLen0to16(data) + } else if len(data) <= 32 { + return hashLen17to32(data) + } else if len(data) <= 64 { + return hashLen33to64(data) + } + + // For strings over 64 bytes we hash the end first, and then as we + // loop we keep 56 bytes of state: v, w, x, y, and z. + x := binary.LittleEndian.Uint64(data[len(data)-40 : len(data)-32]) + y := binary.LittleEndian.Uint64(data[len(data)-16:len(data)-8]) + k1 + z := binary.LittleEndian.Uint64(data[len(data)-56:len(data)-48]) + uint64(len(data)) + v := weakHashLen32WithSeeds(data[len(data)-64:len(data)-32], uint64(len(data)), y) + w := weakHashLen32WithSeeds(data[len(data)-32:], z+k1, x) + x = x*k1 + binary.LittleEndian.Uint64(data) + + offset := 0 + for len(data)-offset > 64 { + x = rotateRight(x+y+v[0]+binary.LittleEndian.Uint64(data[offset+8:offset+16]), 37) * k1 + y = rotateRight(y+v[1]+binary.LittleEndian.Uint64(data[offset+48:offset+56]), 42) * k1 + x, y = x^w[1], y^v[0] + z = rotateRight(z+w[0], 33) * k1 + v = weakHashLen32WithSeeds(data[offset:offset+32], v[1]*k1, x+w[0]) + w = weakHashLen32WithSeeds(data[offset+32:offset+64], z, y+binary.LittleEndian.Uint64(data[offset+48:offset+56])) + offset += 64 + } + return hashLen16(hashLen16(v[0], w[0])+shiftMix(y)*k0+z, hashLen16(v[1], w[1])+x) +} + +func rotateRight(val uint64, shift uint) uint64 { + return (val >> shift) | (val << (64 - shift)) +} + +func hashLen16(u, v uint64) uint64 { + const ( + kMul = uint64(0x9ddfea08eb382d69) + ) + a := (u ^ v) * kMul + a ^= (a >> 47) + b := (v ^ a) * kMul + b ^= (b >> 47) + b *= kMul + return b +} + +func shiftMix(val uint64) uint64 { + return val ^ (val >> 47) +} + +func weakHashLen32WithSeeds(data []byte, seedA, seedB uint64) [2]uint64 { + a := binary.LittleEndian.Uint64(data[0:8]) + b := binary.LittleEndian.Uint64(data[8:16]) + c := binary.LittleEndian.Uint64(data[16:24]) + d := binary.LittleEndian.Uint64(data[24:32]) + + a += seedA + b = rotateRight(b+seedB+a, 21) + c += a + a += d + d = rotateRight(d, 44) + + return [2]uint64{a + b + c, b + d} +} + +func hashLen0to16(data []byte) uint64 { + if len(data) > 8 { + a := binary.LittleEndian.Uint64(data) + b := binary.LittleEndian.Uint64(data[len(data)-8:]) + return hashLen16(a, rotateRight(b+uint64(len(data)), 53)^a) ^ b + } + if len(data) >= 4 { + a := uint64(binary.LittleEndian.Uint32(data)) + return hashLen16(uint64(len(data))+(a<<3), uint64(binary.LittleEndian.Uint32(data[len(data)-4:]))) + } + if len(data) > 0 { + a := uint64(data[0]) + b := uint64(data[len(data)>>1]) + c := uint64(data[len(data)-1]) + y := a + (b << 8) + z := uint64(len(data)) + (c << 2) + return shiftMix(y*k2^z*k0) * k2 + } + return k2 +} + +func hashLen17to32(data []byte) uint64 { + a := binary.LittleEndian.Uint64(data) * k1 + b := binary.LittleEndian.Uint64(data[8:]) + c := binary.LittleEndian.Uint64(data[len(data)-8:]) * k2 + d := binary.LittleEndian.Uint64(data[len(data)-16:]) * k0 + return hashLen16(rotateRight(a-b, 43)+rotateRight(c, 30)+d, a+rotateRight(b^k3, 20)-c+uint64(len(data))) +} + +func hashLen33to64(data []byte) uint64 { + z := binary.LittleEndian.Uint64(data[24:]) + a := binary.LittleEndian.Uint64(data[0:8]) * k2 + b := binary.LittleEndian.Uint64(data[8:16]) + c := binary.LittleEndian.Uint64(data[len(data)-8:]) * k2 + d := binary.LittleEndian.Uint64(data[len(data)-16:]) * k2 + a = rotateRight(a+c, 43) + rotateRight(b, 30) + z + b = shiftMix(b + a + d) + return hashLen16(a, b) +} diff --git a/pkg/fingerprints/ctls.go b/pkg/fingerprints/ctls.go new file mode 100644 index 00000000..e4cac718 --- /dev/null +++ b/pkg/fingerprints/ctls.go @@ -0,0 +1,30 @@ +// Copyright 2023 Paolo Fabio Zaino +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fingerprints implements the fingerprints library for the Crowler +package fingerprints + +import ( + "crypto/sha256" + "encoding/hex" +) + +// CustomTLS implements the Fingerprint interface for custom TLS fingerprints. +type CustomTLS struct{} + +// Compute computes the custom TLS fingerprint of a given data. +func (c CustomTLS) Compute(data string) string { + hash := sha256.Sum256([]byte(data)) + return hex.EncodeToString(hash[:]) +} diff --git a/pkg/fingerprints/factory.go b/pkg/fingerprints/factory.go new file mode 100644 index 00000000..e0918db5 --- /dev/null +++ b/pkg/fingerprints/factory.go @@ -0,0 +1,71 @@ +// Copyright 2023 Paolo Fabio Zaino +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fingerprints implements the fingerprints library for the Crowler +package fingerprints + +import "fmt" + +// FingerprintType represents the type of fingerprint algorithm. +type FingerprintType int + +const ( + TypeJA3 FingerprintType = iota + TypeJA3S + TypeHASSH + TypeHASSHServer + TypeTLSH + TypeSimHash + TypeMinHash + TypeBLAKE2 + TypeSHA256 + TypeCityHash + TypeMurmurHash + TypeCustomTLS + TypeJARM +) + +// FingerprintFactory creates an instance of a Fingerprint implementation. +func FingerprintFactory(fType FingerprintType) (Fingerprint, error) { + switch fType { + case TypeJA3: + return &JA3{}, nil + case TypeJA3S: + return &JA3S{}, nil + case TypeHASSH: + return &HASSH{}, nil + case TypeHASSHServer: + return &HASSHServer{}, nil + case TypeTLSH: + return &TLSH{}, nil + case TypeSimHash: + return &SimHash{}, nil + case TypeMinHash: + return &MinHash{}, nil + case TypeBLAKE2: + return &BLAKE2{}, nil + case TypeSHA256: + return &SHA256{}, nil + case TypeCityHash: + return &CityHash{}, nil + case TypeMurmurHash: + return &MurmurHash{}, nil + case TypeCustomTLS: + return &CustomTLS{}, nil + case TypeJARM: + return &JARM{}, nil + default: + return nil, fmt.Errorf("unknown fingerprint type") + } +} diff --git a/pkg/fingerprints/hassh.go b/pkg/fingerprints/hassh.go new file mode 100644 index 00000000..d589631f --- /dev/null +++ b/pkg/fingerprints/hassh.go @@ -0,0 +1,29 @@ +// Copyright 2023 Paolo Fabio Zaino +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fingerprints implements the fingerprints library for the Crowler +package fingerprints + +import ( + "crypto/md5" + "encoding/hex" +) + +// HASSH implements the Fingerprint interface for HASSH fingerprints. +type HASSH struct{} + +func (h HASSH) Compute(data string) string { + hash := md5.Sum([]byte(data)) + return hex.EncodeToString(hash[:]) +} diff --git a/pkg/fingerprints/hassh_server.go b/pkg/fingerprints/hassh_server.go new file mode 100644 index 00000000..ef09a222 --- /dev/null +++ b/pkg/fingerprints/hassh_server.go @@ -0,0 +1,29 @@ +// Copyright 2023 Paolo Fabio Zaino +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fingerprints implements the fingerprints library for the Crowler +package fingerprints + +import ( + "crypto/md5" + "encoding/hex" +) + +// HASSHServer implements the Fingerprint interface for HASSHServer fingerprints. +type HASSHServer struct{} + +func (h HASSHServer) Compute(data string) string { + hash := md5.Sum([]byte(data)) + return hex.EncodeToString(hash[:]) +} diff --git a/pkg/fingerprints/ja3.go b/pkg/fingerprints/ja3.go new file mode 100644 index 00000000..e5c24a49 --- /dev/null +++ b/pkg/fingerprints/ja3.go @@ -0,0 +1,39 @@ +// Copyright 2023 Paolo Fabio Zaino +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fingerprints implements the fingerprints library for the Crowler +package fingerprints + +import ( + "crypto/md5" + "encoding/hex" +) + +// JA3 implements the Fingerprint interface for JA3 fingerprints. +type JA3 struct{} + +// Compute computes the JA3 fingerprint of a given data. +func (j JA3) Compute(data string) string { + hash := md5.Sum([]byte(data)) + return hex.EncodeToString(hash[:]) +} + +// JA3S implements the Fingerprint interface for JA3S fingerprints. +type JA3S struct{} + +// Compute computes the JA3S fingerprint of a given data. +func (j JA3S) Compute(data string) string { + hash := md5.Sum([]byte(data)) + return hex.EncodeToString(hash[:]) +} diff --git a/pkg/fingerprints/jarm.go b/pkg/fingerprints/jarm.go new file mode 100644 index 00000000..a3e40564 --- /dev/null +++ b/pkg/fingerprints/jarm.go @@ -0,0 +1,106 @@ +// Copyright 2023 Paolo Fabio Zaino +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fingerprints implements the fingerprints library for the Crowler +package fingerprints + +import ( + "crypto/sha256" + "encoding/hex" + "fmt" + "strings" +) + +type JARM struct{} + +// Compute computes the JARM fingerprint of a given data. +func (j JARM) Compute(data string) string { + // Assuming 'data' is a string containing multiple handshake details separated by commas. + return jarmHash(data) +} + +// jarmHash computes the JARM fingerprint of a given JARM string. +func jarmHash(jarmRaw string) string { + if jarmRaw == "|||,|||,|||,|||,|||,|||,|||,|||,|||,|||" { + return strings.Repeat("0", 62) + } + + var fuzzyHash strings.Builder + handshakes := strings.Split(jarmRaw, ",") + var alpnsAndExt strings.Builder + + for _, handshake := range handshakes { + components := strings.Split(handshake, "|") + fuzzyHash.WriteString(cipherBytes(components[0])) + fuzzyHash.WriteString(versionByte(components[1])) + alpnsAndExt.WriteString(components[2]) + alpnsAndExt.WriteString(components[3]) + } + + sha256 := sha256Sum(alpnsAndExt.String()) + fuzzyHash.WriteString(sha256[:32]) + return fuzzyHash.String() +} + +// cipherBytes returns the hex value of the cipher suite. +func cipherBytes(cipher string) string { + if cipher == "" { + return "00" + } + + cipherList := [][]byte{ + {0x00, 0x04}, {0x00, 0x05}, {0x00, 0x07}, {0x00, 0x0a}, {0x00, 0x16}, + {0x00, 0x2f}, {0x00, 0x33}, {0x00, 0x35}, {0x00, 0x39}, {0x00, 0x3c}, + {0x00, 0x3d}, {0x00, 0x41}, {0x00, 0x45}, {0x00, 0x67}, {0x00, 0x6b}, + {0x00, 0x84}, {0x00, 0x88}, {0x00, 0x9a}, {0x00, 0x9c}, {0x00, 0x9d}, + {0x00, 0x9e}, {0x00, 0x9f}, {0x00, 0xba}, {0x00, 0xbe}, {0x00, 0xc0}, + {0x00, 0xc4}, {0xc0, 0x07}, {0xc0, 0x08}, {0xc0, 0x09}, {0xc0, 0x0a}, + {0xc0, 0x11}, {0xc0, 0x12}, {0xc0, 0x13}, {0xc0, 0x14}, {0xc0, 0x23}, + {0xc0, 0x24}, {0xc0, 0x27}, {0xc0, 0x28}, {0xc0, 0x2b}, {0xc0, 0x2c}, + {0xc0, 0x2f}, {0xc0, 0x30}, {0xc0, 0x60}, {0xc0, 0x61}, {0xc0, 0x72}, + {0xc0, 0x73}, {0xc0, 0x76}, {0xc0, 0x77}, {0xc0, 0x9c}, {0xc0, 0x9d}, + {0xc0, 0x9e}, {0xc0, 0x9f}, {0xc0, 0xa0}, {0xc0, 0xa1}, {0xc0, 0xa2}, + {0xc0, 0xa3}, {0xc0, 0xac}, {0xc0, 0xad}, {0xc0, 0xae}, {0xc0, 0xaf}, + {0xcc, 0x13}, {0xcc, 0x14}, {0xcc, 0xa8}, {0xcc, 0xa9}, {0x13, 0x01}, + {0x13, 0x02}, {0x13, 0x03}, {0x13, 0x04}, {0x13, 0x05}, + } + + count := 1 + for _, bytes := range cipherList { + if cipher == hex.EncodeToString(bytes) { + break + } + count++ + } + + hexValue := fmt.Sprintf("%02x", count) + return hexValue +} + +// versionByte returns the hex value of the TLS version. +func versionByte(version string) string { + if version == "" { + return "0" + } + + options := "abcdef" + count := int(version[3] - '0') + return string(options[count]) +} + +// sha256Sum returns the SHA256 hash of the given data. +func sha256Sum(data string) string { + hash := sha256.Sum256([]byte(data)) + return hex.EncodeToString(hash[:]) +} diff --git a/pkg/fingerprints/minhash.go b/pkg/fingerprints/minhash.go new file mode 100644 index 00000000..599b1de8 --- /dev/null +++ b/pkg/fingerprints/minhash.go @@ -0,0 +1,70 @@ +// Copyright 2023 Paolo Fabio Zaino +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fingerprints implements the fingerprints library for the Crowler +package fingerprints + +import ( + "fmt" + "hash/fnv" + "math" +) + +// MinHash implements the Fingerprint interface for MinHash fingerprints. +type MinHash struct { + numHash int + hashes []uint64 +} + +// NewMinHash creates a new MinHash fingerprint with the given number of hashes. +func NewMinHash(numHash int) *MinHash { + hashes := make([]uint64, numHash) + for i := range hashes { + hashes[i] = math.MaxUint64 + } + return &MinHash{ + numHash: numHash, + hashes: hashes, + } +} + +// hashFunction computes the hash of the given data with the given seed. +func hashFunction(data []byte, seed uint64) uint64 { + h := fnv.New64a() + h.Write(data) + h.Write([]byte{byte(seed)}) + return h.Sum64() +} + +// Push pushes the given data into the MinHash fingerprint. +func (mh *MinHash) Push(data []byte) { + for i := 0; i < mh.numHash; i++ { + hashValue := hashFunction(data, uint64(i)) + if hashValue < mh.hashes[i] { + mh.hashes[i] = hashValue + } + } +} + +// Signature returns the MinHash fingerprint signature. +func (mh *MinHash) Signature() []uint64 { + return mh.hashes +} + +// Compute computes the MinHash fingerprint of a given data. +func (m MinHash) Compute(data string) string { + mh := NewMinHash(200) + mh.Push([]byte(data)) + return fmt.Sprintf("%x", mh.Signature()) +} diff --git a/pkg/fingerprints/murmurhash.go b/pkg/fingerprints/murmurhash.go new file mode 100644 index 00000000..52417e3e --- /dev/null +++ b/pkg/fingerprints/murmurhash.go @@ -0,0 +1,30 @@ +// Copyright 2023 Paolo Fabio Zaino +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fingerprints implements the fingerprints library for the Crowler +package fingerprints + +import ( + "fmt" + + "github.com/spaolacci/murmur3" +) + +// MurmurHash implements the Fingerprint interface for MurmurHash fingerprints. +type MurmurHash struct{} + +// Compute computes the MurmurHash fingerprint of a given data. +func (m MurmurHash) Compute(data string) string { + return fmt.Sprintf("%x", murmur3.Sum32([]byte(data))) +} diff --git a/pkg/fingerprints/sha256.go b/pkg/fingerprints/sha256.go new file mode 100644 index 00000000..35ed773c --- /dev/null +++ b/pkg/fingerprints/sha256.go @@ -0,0 +1,30 @@ +// Copyright 2023 Paolo Fabio Zaino +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fingerprints implements the fingerprints library for the Crowler +package fingerprints + +import ( + "crypto/sha256" + "encoding/hex" +) + +// SHA256 implements the Fingerprint interface for SHA-256 fingerprints. +type SHA256 struct{} + +// Compute computes the SHA-256 fingerprint of a given data. +func (s SHA256) Compute(data string) string { + hash := sha256.Sum256([]byte(data)) + return hex.EncodeToString(hash[:]) +} diff --git a/pkg/fingerprints/simhash.go b/pkg/fingerprints/simhash.go new file mode 100644 index 00000000..a60ee090 --- /dev/null +++ b/pkg/fingerprints/simhash.go @@ -0,0 +1,52 @@ +// Copyright 2023 Paolo Fabio Zaino +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fingerprints implements the fingerprints library for the Crowler +package fingerprints + +import ( + "crypto/md5" + "encoding/binary" + "fmt" + "strings" +) + +// SimHash implements the Fingerprint interface for SimHash fingerprints. +type SimHash struct{} + +func (s SimHash) Compute(data string) string { + bits := make([]int, 64) + words := strings.Fields(data) + + for _, word := range words { + hash := md5.Sum([]byte(word)) + for i := 0; i < 64; i++ { + bit := (binary.BigEndian.Uint64(hash[:]) >> i) & 1 + if bit == 1 { + bits[i]++ + } else { + bits[i]-- + } + } + } + + var fingerprint uint64 + for i := 0; i < 64; i++ { + if bits[i] > 0 { + fingerprint |= 1 << i + } + } + + return fmt.Sprintf("%x", fingerprint) +} diff --git a/pkg/fingerprints/tlsh.go b/pkg/fingerprints/tlsh.go new file mode 100644 index 00000000..d310494e --- /dev/null +++ b/pkg/fingerprints/tlsh.go @@ -0,0 +1,59 @@ +// Copyright 2023 Paolo Fabio Zaino +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fingerprints implements the fingerprints library for the Crowler +package fingerprints + +import ( + "crypto/sha256" + "encoding/hex" +) + +// TLSH implements the Fingerprint interface for TLSH fingerprints. +type TLSH struct { + buckets [256]int + total int + checksum [1]byte +} + +// NewTLSH creates a new TLSH fingerprint. +func NewTLSH() *TLSH { + return &TLSH{} +} + +// Update updates the TLSH fingerprint with new data. +func (t *TLSH) Update(data []byte) { + for _, b := range data { + t.checksum[0] ^= b + t.buckets[b]++ + t.total++ + } +} + +// Finalize finalizes the TLSH fingerprint. +func (t *TLSH) Finalize() string { + digest := sha256.New() + for _, b := range t.buckets { + digest.Write([]byte{byte(b)}) + } + hash := digest.Sum(nil) + return hex.EncodeToString(hash) +} + +// Compute computes the TLSH fingerprint of a given data. +func (t TLSH) Compute(data string) string { + tlsh := NewTLSH() + tlsh.Update([]byte(data)) + return tlsh.Finalize() +} diff --git a/pkg/fingerprints/types.go b/pkg/fingerprints/types.go new file mode 100644 index 00000000..f47b6be8 --- /dev/null +++ b/pkg/fingerprints/types.go @@ -0,0 +1,21 @@ +// Copyright 2023 Paolo Fabio Zaino +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fingerprints implements the fingerprints library for the Crowler +package fingerprints + +// Fingerprint is the interface that wraps the basic Compute method. +type Fingerprint interface { + Compute(data string) string +} diff --git a/pkg/httpinfo/httpinfo.go b/pkg/httpinfo/httpinfo.go index 8f61d869..29013cce 100644 --- a/pkg/httpinfo/httpinfo.go +++ b/pkg/httpinfo/httpinfo.go @@ -82,7 +82,7 @@ func ExtractHTTPInfo(config Config, re *ruleset.RuleEngine, htmlContent string) // Retrieve SSL Info (if it's HTTPS) cmn.DebugMsg(cmn.DbgLvlDebug1, "Collecting SSL/TLS information for URL: %s", config.URL) - sslInfo, err := getSSLInfo(config.URL) + sslInfo, err := getSSLInfo(&config) if err != nil { cmn.DebugMsg(cmn.DbgLvlError, "retrieving SSL information: %v", err) } @@ -142,18 +142,49 @@ func validateIPAddress(url string) error { return nil } -func getSSLInfo(url string) (*SSLInfo, error) { +func getSSLInfo(config *Config) (*SSLInfo, error) { + // Check if the URL has a port number, if so, extract the port number + url := strings.TrimSpace(config.URL) + port := "" + // first let's remove the scheme + if strings.HasPrefix(strings.ToLower(url), "http") { + url = strings.Replace(url, "http://", "", 1) + url = strings.Replace(url, "https://", "", 1) + port = "443" + } else if strings.HasPrefix(strings.ToLower(url), "ftp") { + url = strings.Replace(url, "ftp://", "", 1) + url = strings.Replace(url, "ftps://", "", 1) + port = "21" + } else if strings.HasPrefix(strings.ToLower(url), "ws:") || + strings.HasPrefix(strings.ToLower(url), "wss:") { + url = strings.Replace(url, "ws://", "", 1) + url = strings.Replace(url, "wss://", "", 1) + port = "80" + } + // now let's check if there is a port number + if strings.Contains(url, ":") { + // extract the port number + port = strings.Split(url, ":")[1] + // remove the port number from the URL + url = strings.Split(url, ":")[0] + } + + cmn.DebugMsg(cmn.DbgLvlDebug1, "URL: %s, Port: %s", url, port) + + // Get the SSL information sslInfo := NewSSLInfo() - if strings.HasPrefix(url, "https") { - err := sslInfo.GetSSLInfo(url, "443") - if err != nil { - cmn.DebugMsg(cmn.DbgLvlDebug1, "Error retrieving SSL information: %v", err) - } - err = sslInfo.ValidateCertificate() - if err != nil { - cmn.DebugMsg(cmn.DbgLvlDebug1, "Error validating SSL certificate: %v", err) - } + //err := sslInfo.GetSSLInfo(url, port) + err := sslInfo.CollectSSLData(url, port, config) + if err != nil { + cmn.DebugMsg(cmn.DbgLvlDebug1, "Error retrieving SSL information: %v", err) + } + + // Validate the SSL certificate + err = sslInfo.ValidateCertificate() + if err != nil { + cmn.DebugMsg(cmn.DbgLvlDebug1, "Error validating SSL certificate: %v", err) } + return sslInfo, nil } @@ -166,6 +197,18 @@ func createHTTPClient(config Config) *http.Client { } sn := urlToDomain(config.URL) transport.TLSClientConfig.ServerName = sn + + if len(config.Proxies) > 0 { + proxyURL, err := url.Parse(config.Proxies[0].Address) + if err == nil { + transport.Proxy = http.ProxyURL(proxyURL) + } + if config.Proxies[0].Username != "" { + transport.ProxyConnectHeader = http.Header{} + transport.ProxyConnectHeader.Set("Proxy-Authorization", basicAuth(config.Proxies[0].Username, config.Proxies[0].Password)) + } + } + httpClient := &http.Client{ Transport: transport, CheckRedirect: func(req *http.Request, via []*http.Request) error { @@ -175,6 +218,11 @@ func createHTTPClient(config Config) *http.Client { return httpClient } +func basicAuth(username, password string) string { + auth := username + ":" + password + return "Basic " + cmn.Base64Encode(auth) +} + func sendHTTPRequest(httpClient *http.Client, config Config) (*http.Response, error) { req, err := http.NewRequest("GET", config.URL, nil) if err != nil { diff --git a/pkg/httpinfo/jarm_collector.go b/pkg/httpinfo/jarm_collector.go new file mode 100644 index 00000000..3619995f --- /dev/null +++ b/pkg/httpinfo/jarm_collector.go @@ -0,0 +1,724 @@ +// Copyright 2023 Paolo Fabio Zaino +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package httpinfo provides functionality to extract HTTP header information +package httpinfo + +import ( + "bytes" + "crypto/rand" + "encoding/binary" + "encoding/hex" + "fmt" + "io" + "math/big" + "net" + "net/url" + "strings" + "time" + + cmn "github.com/pzaino/thecrowler/pkg/common" + cfg "github.com/pzaino/thecrowler/pkg/config" + "golang.org/x/net/proxy" +) + +const ( + sslV3 = "SSLv3" + tlsV10 = "TLS_1" + tlsV11 = "TLS_1.1" + tlsV12 = "TLS_1.2" + tlsV12Support = "1.2_SUPPORT" + tlsV13 = "TLS_1.3" + tlsV13Support = "1.3_SUPPORT" +) + +type JARMCollector struct { + Proxy *cfg.SOCKSProxy +} + +type ProxyConfig struct { + Address string + Username string + Password string +} + +// formatForPython encodes a byte slice into a Python byte string format. +func formatForPython(data []byte) string { + var buffer bytes.Buffer + buffer.WriteString("b'") + for _, b := range data { + if b >= 0x20 && b <= 0x7e { + buffer.WriteByte(b) + } else { + buffer.WriteString(fmt.Sprintf("\\x%02x", b)) + } + } + buffer.WriteString("'") + return buffer.String() +} + +// Print out detailed parts of the ClientHello message +func PrintClientHelloDetails(packet []byte) { + defer func() { + if r := recover(); r != nil { + fmt.Println("Recovered in printClientHelloDetails:", r) + } + }() + + fmt.Println("------------------------------------------------------------") + fmt.Printf("ClientHello Packet: %x\n", packet) + + if len(packet) < 9 { + fmt.Println("Packet too short") + return + } + + contentType := packet[0] + version := packet[1:3] + length := packet[3:5] + handshakeType := packet[5] + handshakeLength := packet[6:9] + clientVersion := packet[9:11] + random := packet[11:43] + sessionIDLength := packet[43] + + if len(packet) < 44+int(sessionIDLength) { + fmt.Println("Packet too short for session ID") + return + } + sessionID := packet[44 : 44+sessionIDLength] + + if len(packet) < 46+int(sessionIDLength) { + fmt.Println("Packet too short for cipher suites length") + return + } + cipherSuitesLength := packet[44+sessionIDLength : 46+sessionIDLength] + cipherSuitesLen := int(cipherSuitesLength[0])<<8 + int(cipherSuitesLength[1]) + + if len(packet) < 46+int(sessionIDLength)+cipherSuitesLen { + fmt.Println("Packet too short for cipher suites") + return + } + cipherSuites := packet[46+int(sessionIDLength) : 46+int(sessionIDLength)+cipherSuitesLen] + + if len(packet) < 46+int(sessionIDLength)+cipherSuitesLen+1 { + fmt.Println("Packet too short for compression methods length") + return + } + compressionMethodsLength := packet[46+int(sessionIDLength)+cipherSuitesLen] + compressionMethodsLen := int(compressionMethodsLength) + + if len(packet) < 47+int(sessionIDLength)+cipherSuitesLen+compressionMethodsLen { + fmt.Println("Packet too short for compression methods") + return + } + compressionMethods := packet[47+int(sessionIDLength)+cipherSuitesLen : 47+int(sessionIDLength)+cipherSuitesLen+compressionMethodsLen] + + if len(packet) < 49+int(sessionIDLength)+cipherSuitesLen+compressionMethodsLen { + fmt.Println("Packet too short for extensions length") + return + } + extensionsLength := packet[47+int(sessionIDLength)+cipherSuitesLen+compressionMethodsLen : 49+int(sessionIDLength)+cipherSuitesLen+compressionMethodsLen] + extensionsLen := int(extensionsLength[0])<<8 + int(extensionsLength[1]) + + if len(packet) < 49+int(sessionIDLength)+cipherSuitesLen+compressionMethodsLen+extensionsLen { + fmt.Println("Packet too short for extensions") + return + } + extensions := packet[49+int(sessionIDLength)+cipherSuitesLen+compressionMethodsLen : 49+int(sessionIDLength)+cipherSuitesLen+compressionMethodsLen+extensionsLen] + + fmt.Printf("Content Type: %x\n", contentType) + fmt.Printf("Version: %x\n", version) + fmt.Printf("Length: %x\n", length) + fmt.Printf("Handshake Type: %x\n", handshakeType) + fmt.Printf("Handshake Length: %x\n", handshakeLength) + fmt.Printf("Client Version: %x\n", clientVersion) + fmt.Printf("Client Version PyString: %s\n", formatForPython(clientVersion)) + fmt.Printf("Random: %x\n", random) + fmt.Printf("Session ID Length: %x\n", sessionIDLength) + fmt.Printf("Session ID: %x\n", sessionID) + fmt.Printf("Cipher Suites Length: %x\n", cipherSuitesLength) + fmt.Printf("Cipher Suites: %x\n", cipherSuites) + fmt.Printf("Cipher Suites PyString: %s\n", formatForPython(cipherSuites)) + fmt.Printf("Compression Methods Length: %x\n", compressionMethodsLength) + fmt.Printf("Compression Methods: %x\n", compressionMethods) + fmt.Printf("Extensions Length: %x\n", extensionsLength) + fmt.Printf("Extensions: %x\n", extensions) + fmt.Printf("Extensions PyString: %s\n", formatForPython(extensions)) + fmt.Println("------------------------------------------------------------") +} + +// Collect collects JARM fingerprint for a given host and port +func (jc JARMCollector) Collect(host string, port string) (string, error) { + jarmDetails := [10][]string{ + {host, port, tlsV12, "ALL", "FORWARD", "NO_GREASE", "APLN", tlsV12Support, "REVERSE"}, + {host, port, tlsV12, "ALL", "REVERSE", "NO_GREASE", "APLN", tlsV12Support, "FORWARD"}, + {host, port, tlsV12, "ALL", "TOP_HALF", "NO_GREASE", "APLN", "NO_SUPPORT", "FORWARD"}, + {host, port, tlsV12, "ALL", "BOTTOM_HALF", "NO_GREASE", "RARE_APLN", "NO_SUPPORT", "FORWARD"}, + {host, port, tlsV12, "ALL", "MIDDLE_OUT", "GREASE", "RARE_APLN", "NO_SUPPORT", "REVERSE"}, + {host, port, tlsV11, "ALL", "FORWARD", "NO_GREASE", "APLN", "NO_SUPPORT", "FORWARD"}, + {host, port, tlsV13, "ALL", "FORWARD", "NO_GREASE", "APLN", tlsV13Support, "REVERSE"}, + {host, port, tlsV13, "ALL", "REVERSE", "NO_GREASE", "APLN", tlsV13Support, "FORWARD"}, + {host, port, tlsV13, "NO1.3", "FORWARD", "NO_GREASE", "APLN", tlsV13Support, "FORWARD"}, + {host, port, tlsV13, "ALL", "MIDDLE_OUT", "GREASE", "APLN", tlsV13Support, "REVERSE"}, + } + + var jarmBuilder strings.Builder + for _, detail := range jarmDetails { + packet := buildPacket(detail) + + // debug: + cmn.DebugMsg(cmn.DbgLvlDebug3, "JARM built packet: %s\n", formatForPython(packet)) + //PrintClientHelloDetails(packet) + + serverHello, err := jc.sendPacket(packet, host, port) + if err != nil { + return "", err + } + ans := readPacket(serverHello, detail) + // debug: + cmn.DebugMsg(cmn.DbgLvlDebug3, "JARM collected response: %s\n", formatForPython(serverHello)) + jarmBuilder.WriteString(ans + ",") + } + jarm := strings.TrimRight(jarmBuilder.String(), ",") + return jarm, nil +} + +// buildPacket constructs a ClientHello packet based on the provided JARM details +func buildPacket(jarmDetails []string) []byte { + payload := []byte{0x16} + var clientHello []byte + + // Version Check + switch jarmDetails[2] { + case tlsV13: + payload = append(payload, []byte{0x03, 0x01}...) + clientHello = append(clientHello, []byte{0x03, 0x03}...) + case sslV3: + payload = append(payload, []byte{0x03, 0x00}...) + clientHello = append(clientHello, []byte{0x03, 0x00}...) + case tlsV10: + payload = append(payload, []byte{0x03, 0x01}...) + clientHello = append(clientHello, []byte{0x03, 0x01}...) + case tlsV11: + payload = append(payload, []byte{0x03, 0x02}...) + clientHello = append(clientHello, []byte{0x03, 0x02}...) + case tlsV12: + payload = append(payload, []byte{0x03, 0x03}...) + clientHello = append(clientHello, []byte{0x03, 0x03}...) + } + + // Random values in client hello + rndBytes := randomBytes(32) + clientHello = append(clientHello, rndBytes...) + // debug: + cmn.DebugMsg(cmn.DbgLvlDebug3, "Random Bytes: %s\n\n", formatForPython(rndBytes)) + sessionID := randomBytes(32) + clientHello = append(clientHello, byte(len(sessionID))) + clientHello = append(clientHello, sessionID...) + // debug: + cmn.DebugMsg(cmn.DbgLvlDebug3, "Session ID: %s\n\n", formatForPython(sessionID)) + + // Get ciphers + cipherChoice := getCiphers(jarmDetails) + clientSuitesLength := toBytes(len(cipherChoice)) + clientHello = append(clientHello, clientSuitesLength...) + clientHello = append(clientHello, cipherChoice...) + // debug: + cmn.DebugMsg(cmn.DbgLvlDebug3, "Cipher Suites: %s\n\n", formatForPython(cipherChoice)) + + // Cipher methods + clientHello = append(clientHello, 0x01) + // Compression methods + clientHello = append(clientHello, 0x00) + + // Add extensions to client hello + extensions := getExtensions(jarmDetails) + clientHello = append(clientHello, toBytes(len(extensions))...) + clientHello = append(clientHello, extensions...) + // debug: + cmn.DebugMsg(cmn.DbgLvlDebug3, "Extensions: %s\n\n", formatForPython(extensions)) + + // Finish packet assembly + innerLength := append([]byte{0x00}, toBytes(len(clientHello))...) + handshakeProtocol := append([]byte{0x01}, innerLength...) + handshakeProtocol = append(handshakeProtocol, clientHello...) + outerLength := toBytes(len(handshakeProtocol)) + payload = append(payload, outerLength...) + payload = append(payload, handshakeProtocol...) + + // debug: + cmn.DebugMsg(cmn.DbgLvlDebug3, "Constructed ClientHello: %s\n", formatForPython(clientHello)) + return payload +} + +// getCiphers returns the selected ciphers based on the JARM details +func getCiphers(jarmDetails []string) []byte { + var selectedCiphers []byte + var cipherList [][]byte + + if jarmDetails[3] == "ALL" { + cipherList = [][]byte{ + {0x00, 0x16}, {0x00, 0x33}, {0x00, 0x67}, {0xc0, 0x9e}, {0xc0, 0xa2}, + {0x00, 0x9e}, {0x00, 0x39}, {0x00, 0x6b}, {0xc0, 0x9f}, {0xc0, 0xa3}, + {0x00, 0x9f}, {0x00, 0x45}, {0x00, 0xbe}, {0x00, 0x88}, {0x00, 0xc4}, + {0x00, 0x9a}, {0xc0, 0x08}, {0xc0, 0x09}, {0xc0, 0x23}, {0xc0, 0xac}, + {0xc0, 0xae}, {0xc0, 0x2b}, {0xc0, 0x0a}, {0xc0, 0x24}, {0xc0, 0xad}, + {0xc0, 0xaf}, {0xc0, 0x2c}, {0xc0, 0x72}, {0xc0, 0x73}, {0xcc, 0xa9}, + {0x13, 0x02}, {0x13, 0x01}, {0xcc, 0x14}, {0xc0, 0x07}, {0xc0, 0x12}, + {0xc0, 0x13}, {0xc0, 0x27}, {0xc0, 0x2f}, {0xc0, 0x14}, {0xc0, 0x28}, + {0xc0, 0x30}, {0xc0, 0x60}, {0xc0, 0x61}, {0xc0, 0x76}, {0xc0, 0x77}, + {0xcc, 0xa8}, {0x13, 0x05}, {0x13, 0x04}, {0x13, 0x03}, {0xcc, 0x13}, + {0xc0, 0x11}, {0x00, 0x0a}, {0x00, 0x2f}, {0x00, 0x3c}, {0xc0, 0x9c}, + {0xc0, 0xa0}, {0x00, 0x9c}, {0x00, 0x35}, {0x00, 0x3d}, {0xc0, 0x9d}, + {0xc0, 0xa1}, {0x00, 0x9d}, {0x00, 0x41}, {0x00, 0xba}, {0x00, 0x84}, + {0x00, 0xc0}, {0x00, 0x07}, {0x00, 0x04}, {0x00, 0x05}, + } + } else if jarmDetails[3] == "NO1.3" { + cipherList = [][]byte{ + {0x00, 0x16}, {0x00, 0x33}, {0x00, 0x67}, {0xc0, 0x9e}, {0xc0, 0xa2}, + {0x00, 0x9e}, {0x00, 0x39}, {0x00, 0x6b}, {0xc0, 0x9f}, {0xc0, 0xa3}, + {0x00, 0x9f}, {0x00, 0x45}, {0x00, 0xbe}, {0x00, 0x88}, {0x00, 0xc4}, + {0x00, 0x9a}, {0xc0, 0x08}, {0xc0, 0x09}, {0xc0, 0x23}, {0xc0, 0xac}, + {0xc0, 0xae}, {0xc0, 0x2b}, {0xc0, 0x0a}, {0xc0, 0x24}, {0xc0, 0xad}, + {0xc0, 0xaf}, {0xc0, 0x2c}, {0xc0, 0x72}, {0xc0, 0x73}, {0xcc, 0xa9}, + {0xcc, 0x14}, {0xc0, 0x07}, {0xc0, 0x12}, {0xc0, 0x13}, {0xc0, 0x27}, + {0xc0, 0x2f}, {0xc0, 0x14}, {0xc0, 0x28}, {0xc0, 0x30}, {0xc0, 0x60}, + {0xc0, 0x61}, {0xc0, 0x76}, {0xc0, 0x77}, {0xcc, 0xa8}, {0xcc, 0x13}, + {0xc0, 0x11}, {0x00, 0x0a}, {0x00, 0x2f}, {0x00, 0x3c}, {0xc0, 0x9c}, + {0xc0, 0xa0}, {0x00, 0x9c}, {0x00, 0x35}, {0x00, 0x3d}, {0xc0, 0x9d}, + {0xc0, 0xa1}, {0x00, 0x9d}, {0x00, 0x41}, {0x00, 0xba}, {0x00, 0x84}, + {0x00, 0xc0}, {0x00, 0x07}, {0x00, 0x04}, {0x00, 0x05}, + } + } + + if jarmDetails[4] != "FORWARD" { + cipherList = cipherMung(cipherList, jarmDetails[4]) + } + + if jarmDetails[5] == "GREASE" { + cipherList = append([][]byte{chooseGrease()}, cipherList...) + } + + for _, cipher := range cipherList { + selectedCiphers = append(selectedCiphers, cipher...) + } + + return selectedCiphers +} + +// cipherMung returns a modified list of ciphers based on the request +func cipherMung(ciphers [][]byte, request string) [][]byte { + var output [][]byte + cipherLen := len(ciphers) + + switch request { + case "REVERSE": + // Ciphers backward + for i := cipherLen - 1; i >= 0; i-- { + output = append(output, ciphers[i]) + } + case "BOTTOM_HALF": + // Bottom half of ciphers + if cipherLen%2 == 1 { + output = ciphers[int(cipherLen/2)+1:] + } else { + output = ciphers[int(cipherLen/2):] + } + case "TOP_HALF": + // Top half of ciphers in reverse order + if cipherLen%2 == 1 { + output = append(output, ciphers[int(cipherLen/2)]) + } + output = append(output, cipherMung(cipherMung(ciphers, "REVERSE"), "BOTTOM_HALF")...) + case "MIDDLE_OUT": + // Middle-out cipher order + middle := int(cipherLen / 2) + if cipherLen%2 == 1 { + output = append(output, ciphers[middle]) + for i := 1; i <= middle; i++ { + output = append(output, ciphers[middle+i]) + output = append(output, ciphers[middle-i]) + } + } else { + for i := 1; i <= middle; i++ { + output = append(output, ciphers[middle-1+i]) + output = append(output, ciphers[middle-i]) + } + } + } + + return output +} + +// getExtensions returns the selected extensions based on the JARM details +func getExtensions(jarmDetails []string) []byte { + var extensionBytes []byte + var allExtensions []byte + grease := false + + // GREASE + if jarmDetails[5] == "GREASE" { + allExtensions = append(allExtensions, chooseGrease()...) + allExtensions = append(allExtensions, 0x00, 0x00) + grease = true + } + + // Server name + allExtensions = append(allExtensions, extensionServerName(jarmDetails[0])...) + + // Other extensions + extendedMasterSecret := []byte{0x00, 0x17, 0x00, 0x00} + allExtensions = append(allExtensions, extendedMasterSecret...) + + maxFragmentLength := []byte{0x00, 0x01, 0x00, 0x01, 0x01} + allExtensions = append(allExtensions, maxFragmentLength...) + + renegotiationInfo := []byte{0xff, 0x01, 0x00, 0x01, 0x00} + allExtensions = append(allExtensions, renegotiationInfo...) + + supportedGroups := []byte{0x00, 0x0a, 0x00, 0x0a, 0x00, 0x08, 0x00, 0x1d, 0x00, 0x17, 0x00, 0x18, 0x00, 0x19} + allExtensions = append(allExtensions, supportedGroups...) + + ecPointFormats := []byte{0x00, 0x0b, 0x00, 0x02, 0x01, 0x00} + allExtensions = append(allExtensions, ecPointFormats...) + + sessionTicket := []byte{0x00, 0x23, 0x00, 0x00} + allExtensions = append(allExtensions, sessionTicket...) + + // Application Layer Protocol Negotiation extension + allExtensions = append(allExtensions, appLayerProtoNegotiation(jarmDetails)...) + + signatureAlgorithms := []byte{0x00, 0x0d, 0x00, 0x14, 0x00, 0x12, 0x04, 0x03, 0x08, 0x04, 0x04, 0x01, 0x05, 0x03, 0x08, 0x05, 0x05, 0x01, 0x08, 0x06, 0x06, 0x01, 0x02, 0x01} + allExtensions = append(allExtensions, signatureAlgorithms...) + + // Key share extension + allExtensions = append(allExtensions, keyShare(grease)...) + + pskKeyExchangeModes := []byte{0x00, 0x2d, 0x00, 0x02, 0x01, 0x01} + allExtensions = append(allExtensions, pskKeyExchangeModes...) + + // Supported versions extension + if jarmDetails[2] == tlsV13 || jarmDetails[7] == tlsV12Support { + allExtensions = append(allExtensions, supportedVersions(jarmDetails, grease)...) + } + + // Finish assembling extensions + extensionLength := len(allExtensions) + extensionBytes = append(extensionBytes, byte(extensionLength>>8), byte(extensionLength&0xff)) + extensionBytes = append(extensionBytes, allExtensions...) + + return extensionBytes +} + +// extensionServerName returns the Server Name Indication extension +func extensionServerName(host string) []byte { + var extSNI []byte + extSNI = append(extSNI, 0x00, 0x00) + extSNILength := len(host) + 5 + extSNI = append(extSNI, byte(extSNILength>>8), byte(extSNILength)) + extSNILength2 := len(host) + 3 + extSNI = append(extSNI, byte(extSNILength2>>8), byte(extSNILength2)) + extSNI = append(extSNI, 0x00) + extSNILength3 := len(host) + extSNI = append(extSNI, byte(extSNILength3>>8), byte(extSNILength3)) + extSNI = append(extSNI, host...) + return extSNI +} + +// appLayerProtoNegotiation returns the Application Layer Protocol Negotiation extension +func appLayerProtoNegotiation(jarmDetails []string) []byte { + var ext []byte + ext = append(ext, 0x00, 0x10) + var alpns [][]byte + + if jarmDetails[6] == "RARE_APLN" { + alpns = [][]byte{ + {0x08, 0x68, 0x74, 0x74, 0x70, 0x2f, 0x30, 0x2e, 0x39}, + {0x08, 0x68, 0x74, 0x74, 0x70, 0x2f, 0x31, 0x2e, 0x30}, + {0x06, 0x73, 0x70, 0x64, 0x79, 0x2f, 0x31}, + {0x06, 0x73, 0x70, 0x64, 0x79, 0x2f, 0x32}, + {0x06, 0x73, 0x70, 0x64, 0x79, 0x2f, 0x33}, + {0x03, 0x68, 0x32, 0x63}, + {0x02, 0x68, 0x71}, + } + } else { + alpns = [][]byte{ + {0x08, 0x68, 0x74, 0x74, 0x70, 0x2f, 0x30, 0x2e, 0x39}, + {0x08, 0x68, 0x74, 0x74, 0x70, 0x2f, 0x31, 0x2e, 0x30}, + {0x08, 0x68, 0x74, 0x74, 0x70, 0x2f, 0x31, 0x2e, 0x31}, + {0x06, 0x73, 0x70, 0x64, 0x79, 0x2f, 0x31}, + {0x06, 0x73, 0x70, 0x64, 0x79, 0x2f, 0x32}, + {0x06, 0x73, 0x70, 0x64, 0x79, 0x2f, 0x33}, + {0x02, 0x68, 0x32}, + {0x03, 0x68, 0x32, 0x63}, + {0x02, 0x68, 0x71}, + } + } + + if jarmDetails[8] != "FORWARD" { + alpns = cipherMung(alpns, jarmDetails[8]) + } + + var allAlpns []byte + for _, alpn := range alpns { + allAlpns = append(allAlpns, alpn...) + } + + secondLength := len(allAlpns) + firstLength := secondLength + 2 + ext = append(ext, byte(firstLength>>8), byte(firstLength)) + ext = append(ext, byte(secondLength>>8), byte(secondLength)) + ext = append(ext, allAlpns...) + return ext +} + +// keyShare returns the Key Share extension +func keyShare(grease bool) []byte { + var ext []byte + ext = append(ext, 0x00, 0x33) + var shareExt []byte + + if grease { + shareExt = append(shareExt, chooseGrease()...) + shareExt = append(shareExt, 0x00, 0x01, 0x00) + } + + shareExt = append(shareExt, 0x00, 0x1d, 0x00, 0x20) + shareExt = append(shareExt, randomBytes(32)...) + + secondLength := len(shareExt) + firstLength := secondLength + 2 + ext = append(ext, byte(firstLength>>8), byte(firstLength)) + ext = append(ext, byte(secondLength>>8), byte(secondLength)) + ext = append(ext, shareExt...) + + return ext +} + +// supportedVersions returns the Supported Versions extension +func supportedVersions(jarmDetails []string, grease bool) []byte { + var ext []byte + ext = append(ext, 0x00, 0x2b) + + var versions [][]byte + if jarmDetails[7] == tlsV12Support { + versions = [][]byte{ + {0x03, 0x01}, + {0x03, 0x02}, + {0x03, 0x03}, + } + } else { + versions = [][]byte{ + {0x03, 0x01}, + {0x03, 0x02}, + {0x03, 0x03}, + {0x03, 0x04}, + } + } + + if jarmDetails[8] != "FORWARD" { + versions = cipherMung(versions, jarmDetails[8]) + } + + var allVersions []byte + if grease { + allVersions = append(allVersions, chooseGrease()...) + } + for _, version := range versions { + allVersions = append(allVersions, version...) + } + + secondLength := len(allVersions) + firstLength := secondLength + 1 + ext = append(ext, byte(firstLength>>8), byte(firstLength)) + ext = append(ext, byte(secondLength)) + ext = append(ext, allVersions...) + + // Debug print to match Python format + cmn.DebugMsg(cmn.DbgLvlDebug3, "supported_versions: %s\n", formatForPython(ext)) + return ext +} + +// sendPacket sends the constructed packet to the target host and port +func (jc JARMCollector) sendPacket(packet []byte, host string, port string) ([]byte, error) { + address := net.JoinHostPort(host, port) + + var conn net.Conn + var err error + if jc.Proxy != nil { + proxyURL, err := url.Parse(jc.Proxy.Address) + if err != nil { + return nil, fmt.Errorf("proxy parse error: %v", err) + } + + dialer, err := proxy.FromURL(proxyURL, proxy.Direct) + if err != nil { + return nil, fmt.Errorf("proxy error: %v", err) + } + conn, err = dialer.Dial("tcp", address) + if err != nil { + return nil, fmt.Errorf("proxy dial error: %v", err) + } + } else { + // Connect directly if no proxy is provided + conn, err = net.DialTimeout("tcp", address, 20*time.Second) + if err != nil { + return nil, fmt.Errorf("direct dial error: %v", err) + } + } + defer conn.Close() + + // Set timeout + err = conn.SetDeadline(time.Now().Add(20 * time.Second)) + if err != nil { + return nil, fmt.Errorf("set deadline error: %v", err) + } + + // Send packet + _, err = conn.Write(packet) + if err != nil { + return nil, fmt.Errorf("write packet error: %v", err) + } + + // Receive server hello + buff := make([]byte, 1484) + n, err := conn.Read(buff) + if err != nil { + if err == io.EOF { + return nil, fmt.Errorf("connection closed by peer") + } + return nil, fmt.Errorf("read packet error: %v", err) + } + + return buff[:n], nil +} + +// readPacket reads the response packet and extracts the JARM fingerprint +func readPacket(data []byte, _ []string) string { + // _ should be jarmDetails, but it is not used at the moment + if data == nil { + return "|||" + } + var jarm strings.Builder + + if data[0] == 21 { + return "|||" + } + + if data[0] == 22 && data[5] == 2 { + serverHelloLength := int(binary.BigEndian.Uint16(data[3:5])) + counter := int(data[43]) + selectedCipher := data[counter+44 : counter+46] + version := data[9:11] + + jarm.WriteString(hex.EncodeToString(selectedCipher)) + jarm.WriteString("|") + jarm.WriteString(hex.EncodeToString(version)) + jarm.WriteString("|") + extensions := extractExtensionInfo(data, counter, serverHelloLength) + jarm.WriteString(extensions) + return jarm.String() + } + + return "|||" +} + +// extractExtensionInfo extracts the extension information from the ServerHello message +func extractExtensionInfo(data []byte, counter int, serverHelloLength int) string { + if data[counter+47] == 11 || bytes.Equal(data[counter+50:counter+53], []byte{0x0e, 0xac, 0x0b}) || counter+42 >= serverHelloLength { + return "|" + } + + count := 49 + counter + length := int(binary.BigEndian.Uint16(data[counter+47 : counter+49])) + maximum := length + count - 1 + var types [][]byte + var values [][]byte + + for count < maximum { + types = append(types, data[count:count+2]) + extLength := int(binary.BigEndian.Uint16(data[count+2 : count+4])) + if extLength == 0 { + count += 4 + values = append(values, []byte{}) + } else { + values = append(values, data[count+4:count+4+extLength]) + count += extLength + 4 + } + } + + var result strings.Builder + alpn := findExtension([]byte{0x00, 0x10}, types, values) + result.WriteString(alpn) + result.WriteString("|") + + for i, t := range types { + result.WriteString(hex.EncodeToString(t)) + if i < len(types)-1 { + result.WriteString("-") + } + } + + return result.String() +} + +// findExtension finds the extension type in the list of types and returns the corresponding value +func findExtension(extType []byte, types [][]byte, values [][]byte) string { + for i, t := range types { + if bytes.Equal(t, extType) { + if bytes.Equal(extType, []byte{0x00, 0x10}) { + return string(values[i][3:]) + } + return hex.EncodeToString(values[i]) + } + } + return "" +} + +func toBytes(i int) []byte { + return []byte{byte(i >> 8), byte(i)} +} + +func randomBytes(n int) []byte { + b := make([]byte, n) + _, err := rand.Read(b) + if err != nil { + fmt.Println("error:", err) + return nil + } + return b +} + +// chooseGrease returns a GREASE value +func chooseGrease() []byte { + greaseList := [][]byte{ + {0x0a, 0x0a}, {0x1a, 0x1a}, {0x2a, 0x2a}, {0x3a, 0x3a}, {0x4a, 0x4a}, + {0x5a, 0x5a}, {0x6a, 0x6a}, {0x7a, 0x7a}, {0x8a, 0x8a}, {0x9a, 0x9a}, + {0xaa, 0xaa}, {0xba, 0xba}, {0xca, 0xca}, {0xda, 0xda}, {0xea, 0xea}, + {0xfa, 0xfa}, + } + + // Use crypto/rand equivalent of math/rand rand.Intn + x := io.Reader(rand.Reader) + y := big.NewInt(int64(len(greaseList))) + n, err := rand.Int(x, y) + if err != nil { + fmt.Println("error:", err) + return greaseList[0] // return a default value in case of error + } + + idx := int(n.Int64()) + return greaseList[idx] +} diff --git a/pkg/httpinfo/jarm_collector_test.go b/pkg/httpinfo/jarm_collector_test.go new file mode 100644 index 00000000..349e1de5 --- /dev/null +++ b/pkg/httpinfo/jarm_collector_test.go @@ -0,0 +1,35 @@ +package httpinfo + +import ( + "os" + "testing" +) + +// TestJARMCollector_Collect tests the Collect method of the JARMCollector. +func TestJARMCollector_Collect(t *testing.T) { + if os.Getenv("GITHUB_ACTIONS") == "true" { + t.Skip("Skipping this test in GitHub Actions.") + } + + jc := JARMCollector{ + Proxy: nil, // Set the proxy configuration if needed + } + + host := "example.com" + port := "443" + + jarm, err := jc.Collect(host, port) + + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + + // Print JARM + t.Logf("JARM: %s", jarm) + + // Add assertions to validate the JARM fingerprint + // For example: + // if jarm != "expected_jarm" { + // t.Errorf("Expected JARM: %s, got: %s", "expected_jarm", jarm) + // } +} diff --git a/pkg/httpinfo/sslinfo.go b/pkg/httpinfo/sslinfo.go index 7fb2c747..ed620278 100644 --- a/pkg/httpinfo/sslinfo.go +++ b/pkg/httpinfo/sslinfo.go @@ -30,6 +30,7 @@ import ( "time" cmn "github.com/pzaino/thecrowler/pkg/common" + fingerprints "github.com/pzaino/thecrowler/pkg/fingerprints" "golang.org/x/crypto/ocsp" ) @@ -100,6 +101,121 @@ func getTimeInCertReportFormat() string { } */ +func (ssl *SSLInfo) CollectSSLData(url string, port string, c *Config) error { + if ssl == nil { + return fmt.Errorf("SSLInfo is nil") + } + + // Collect all necessary data once + dc := DataCollector{} + collectedData, err := dc.CollectAll(url, port, c) + if err != nil { + return err + } + + // Check if the TLSCertificates are empty + if len(collectedData.TLSCertificates) == 0 { + return fmt.Errorf("no certificates found") + } + + // Extract the certificate information + ssl.CertChain = collectedData.TLSCertificates + + // Get all fingerprints + ssl.Fingerprints = make(map[string]string) + getFingerprints(ssl, collectedData, c) + + return nil +} + +/* + // Collect JARM fingerprint + collector := JARMCollector{} + fingerprint := fingerprints.JARM{} + + // Check if the URL has a port number, if so, extract the port number + url := config.URL + port := "" + // first let's remove the scheme + if strings.HasPrefix(url, "http") { + url = strings.Replace(url, "http://", "", 1) + url = strings.Replace(url, "https://", "", 1) + port = "443" + } else if strings.HasPrefix(url, "ftp") { + url = strings.Replace(url, "ftp://", "", 1) + url = strings.Replace(url, "ftps://", "", 1) + port = "21" + } else if strings.HasPrefix(url, "ws") { + url = strings.Replace(url, "ws://", "", 1) + url = strings.Replace(url, "wss://", "", 1) + port = "80" + } + // now let's check if there is a port number + if strings.Contains(url, ":") { + // extract the port number + port = strings.Split(url, ":")[1] + // remove the port number from the URL + url = strings.Split(url, ":")[0] + } + + // Collect the handshake data + data, err := collector.Collect(url, port) + skipJARM := false + if err != nil { + cmn.DebugMsg(cmn.DbgLvlDebug1, "Error converting SSL info to details: %v", err) + skipJARM = true + } + + // Compute the JARM fingerprint + if !skipJARM { + jarm := fingerprint.Compute(data) + info.Fingerprints["JARM"] = jarm + } +*/ + +func getFingerprints(ssl *SSLInfo, collectedData *CollectedData, c *Config) { + // Compute all fingerprints + if c.SSLDiscovery.CityHash { + ssl.Fingerprints["CityHash"] = ComputeCityHash(collectedData) + } + if c.SSLDiscovery.SHA256 { + ssl.Fingerprints["SHA256"] = ComputeSHA256(collectedData) + } + if c.SSLDiscovery.BLAKE2 { + ssl.Fingerprints["BLAKE2"] = ComputeBLAKE2(collectedData) + } + if c.SSLDiscovery.MurmurHash { + ssl.Fingerprints["MurmurHash"] = ComputeMurmurHash(collectedData) + } + if c.SSLDiscovery.TLSH { + ssl.Fingerprints["TLSH"] = ComputeTLSH(collectedData) + } + if c.SSLDiscovery.SimHash { + ssl.Fingerprints["SimHash"] = ComputeSimHash(collectedData) + } + if c.SSLDiscovery.MinHash { + ssl.Fingerprints["MinHash"] = ComputeMinHash(collectedData) + } + if c.SSLDiscovery.JA3 { + ssl.Fingerprints["JA3"] = ComputeJA3(collectedData) + } + if c.SSLDiscovery.JA3S { + ssl.Fingerprints["JA3S"] = ComputeJA3S(collectedData) + } + if c.SSLDiscovery.HASSH { + ssl.Fingerprints["HASSH"] = ComputeHASSH(collectedData) + } + if c.SSLDiscovery.HASSHServer { + ssl.Fingerprints["HASSHServer"] = ComputeHASSHServer(collectedData) + } + if c.SSLDiscovery.CustomTLS { + ssl.Fingerprints["CustomTLS"] = ComputeCustomTLS(collectedData) + } + if c.SSLDiscovery.JARM { + ssl.Fingerprints["JARM"] = ComputeJARM(collectedData) + } +} + func (ssl *SSLInfo) GetSSLInfo(url string, port string) error { // Get the certificate from the server var err error @@ -755,3 +871,77 @@ func listIntermediateAuthorities(certChain []*x509.Certificate) ([]string, error return intermediateAuthorities, nil } + +func ComputeJA3(data *CollectedData) string { + ja3 := fingerprints.JA3{} + return ja3.Compute(string(data.RawClientHello)) +} + +func ComputeJA3S(data *CollectedData) string { + ja3s := fingerprints.JA3S{} + return ja3s.Compute(string(data.RawServerHello)) +} + +func ComputeHASSH(data *CollectedData) string { + hassh := fingerprints.HASSH{} + return hassh.Compute(string(data.SSHClientHello)) +} + +func ComputeHASSHServer(data *CollectedData) string { + hasshServer := fingerprints.HASSHServer{} + return hasshServer.Compute(string(data.SSHServerHello)) +} + +func ComputeTLSH(data *CollectedData) string { + tlsh := fingerprints.TLSH{} + content := string(data.RawClientHello) + string(data.RawServerHello) + return tlsh.Compute(content) +} + +func ComputeSimHash(data *CollectedData) string { + simhash := fingerprints.SimHash{} + content := string(data.RawClientHello) + string(data.RawServerHello) + return simhash.Compute(content) +} + +func ComputeMinHash(data *CollectedData) string { + minhash := fingerprints.MinHash{} + content := string(data.RawClientHello) + string(data.RawServerHello) + return minhash.Compute(content) +} + +func ComputeBLAKE2(data *CollectedData) string { + blake2 := fingerprints.BLAKE2{} + content := string(data.RawClientHello) + return blake2.Compute(content) +} + +func ComputeSHA256(data *CollectedData) string { + sha256 := fingerprints.SHA256{} + content := string(data.RawClientHello) + return sha256.Compute(content) +} + +func ComputeCityHash(data *CollectedData) string { + cityHash := fingerprints.CityHash{} + content := string(data.RawClientHello) + return cityHash.Compute(content) +} + +func ComputeMurmurHash(data *CollectedData) string { + murmurHash := fingerprints.MurmurHash{} + content := string(data.RawClientHello) + return murmurHash.Compute(content) +} + +func ComputeCustomTLS(data *CollectedData) string { + customTLS := fingerprints.CustomTLS{} + content := string(data.RawClientHello) + return customTLS.Compute(content) +} + +func ComputeJARM(data *CollectedData) string { + jarm := fingerprints.JARM{} + content := data.JARMFingerprint + return jarm.Compute(content) +} diff --git a/pkg/httpinfo/tls_collector.go b/pkg/httpinfo/tls_collector.go new file mode 100644 index 00000000..b3ef2be0 --- /dev/null +++ b/pkg/httpinfo/tls_collector.go @@ -0,0 +1,223 @@ +// Copyright 2023 Paolo Fabio Zaino +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package httpinfo provides functionality to extract HTTP header and SSL/TLS information +package httpinfo + +import ( + "bytes" + "fmt" + "net/url" + + "crypto/tls" + "io" + "net" + "time" + + cmn "github.com/pzaino/thecrowler/pkg/common" + cfg "github.com/pzaino/thecrowler/pkg/config" + + "golang.org/x/crypto/ssh" + "golang.org/x/net/proxy" +) + +type captureConn struct { + net.Conn + r io.Reader + w io.Writer +} + +func (c *captureConn) Read(b []byte) (int, error) { + if c.r == nil { + return 0, io.EOF + } + return c.r.Read(b) +} + +func (c *captureConn) Write(b []byte) (int, error) { + if c.w == nil { + return len(b), fmt.Errorf("write not supported") + } + return c.w.Write(b) +} + +type DataCollector struct { + Proxy *cfg.SOCKSProxy +} + +func (dc DataCollector) dial(host, port string) (net.Conn, error) { + address := net.JoinHostPort(host, port) + if dc.Proxy != nil && dc.Proxy.Address != "" { + proxyURL, err := url.Parse(dc.Proxy.Address) + if err != nil { + return nil, err + } + + if dc.Proxy.Username != "" { + proxyURL.User = url.UserPassword(dc.Proxy.Username, dc.Proxy.Password) + } + + dialer, err := proxy.FromURL(proxyURL, proxy.Direct) + if err != nil { + return nil, err + } + + return dialer.Dial("tcp", address) + } + + return net.DialTimeout("tcp", address, 10*time.Second) +} + +func (dc DataCollector) CollectAll(host string, port string, c *Config) (*CollectedData, error) { + collectedData := &CollectedData{} + + // Set the proxy if it is defined + var proxy *cfg.SOCKSProxy + if c != nil { + if len(c.Proxies) > 0 { + if len(c.Proxies) > 1 { + proxy = &c.Proxies[1] + } else { + proxy = &c.Proxies[0] + } + } + } + if proxy != nil { + dc.Proxy = proxy + } + + // Buffer to capture the TLS handshake + var clientHelloBuf bytes.Buffer //, serverHelloBuf bytes.Buffer + + // Dial the server + rawConn, err := dc.dial(host, port) + if err != nil { + return nil, err + } + defer rawConn.Close() + + // Wrap the connection to capture the ClientHello message + clientHelloCapture := io.TeeReader(rawConn, &clientHelloBuf) + captureConn := &captureConn{Conn: rawConn, r: clientHelloCapture, w: rawConn} + + // Perform the TLS handshake + conn := tls.Client(captureConn, &tls.Config{ + InsecureSkipVerify: true, + }) + err = conn.Handshake() + if err != nil { + return nil, err + } + + // Collect TLS Handshake state + collectedData.TLSHandshakeState = conn.ConnectionState() + + // Collect Peer Certificates + collectedData.TLSCertificates = conn.ConnectionState().PeerCertificates + + // Store captured ClientHello and ServerHello messages + collectedData.RawClientHello = clientHelloBuf.Bytes() + + // Capture the ServerHello message directly from the connection + err = conn.Handshake() + if err != nil { + return nil, err + } + collectedData.RawServerHello = captureServerHello(conn) + + // Collect JARM fingerprint + if c.SSLDiscovery.JARM { + jarmCollector := JARMCollector{} + if proxy != nil { + jarmCollector.Proxy = proxy + } + jarmFingerprint, err := jarmCollector.Collect(host, port) + if err != nil { + return collectedData, err + } + collectedData.JARMFingerprint = jarmFingerprint + cmn.DebugMsg(cmn.DbgLvlDebug5, "JARM collected Fingerprint: %s", jarmFingerprint) + } + + // Collect SSH data + if c.SSHDiscovery { + err = dc.CollectSSH(collectedData, host, port) + if err != nil { + return collectedData, err + } + } + + return collectedData, nil +} + +func captureServerHello(conn *tls.Conn) []byte { + var serverHelloBuf bytes.Buffer + serverHelloCapture := io.TeeReader(conn, &serverHelloBuf) + _, err := io.Copy(io.Discard, serverHelloCapture) + if err != nil && err != io.EOF { + return nil + } + return serverHelloBuf.Bytes() +} + +func (dc DataCollector) CollectSSH(collectedData *CollectedData, host string, port string) error { + // Buffers to capture the SSH handshake + var clientHelloBuf, serverHelloBuf bytes.Buffer + + // Dial the SSH server + conn, err := dc.dial(host, port) + if err != nil { + return err + } + defer conn.Close() + + // Create SSH client config + clientConfig := &ssh.ClientConfig{ + User: "user", + HostKeyCallback: ssh.InsecureIgnoreHostKey(), + } + + // Wrap the connection to capture the ClientHello and ServerHello messages + clientHelloCapture := io.TeeReader(conn, &clientHelloBuf) + serverHelloCapture := io.MultiWriter(&serverHelloBuf, conn) + captureConn := &captureConn{Conn: conn, r: clientHelloCapture, w: serverHelloCapture} + + // Perform the SSH handshake + sshConn, newChannels, requests, err := ssh.NewClientConn(captureConn, host, clientConfig) + if err != nil { + return err + } + defer sshConn.Close() + + // Store captured SSH ClientHello and ServerHello messages + collectedData.SSHClientHello = clientHelloBuf.Bytes() + collectedData.SSHServerHello = serverHelloBuf.Bytes() + + // Handle channels and requests (necessary for SSH connection) + go ssh.DiscardRequests(requests) + go handleSSHChannels(newChannels) + + return nil +} + +func handleSSHChannels(channels <-chan ssh.NewChannel) { + for newChannel := range channels { + channel, requests, err := newChannel.Accept() + if err != nil { + continue + } + go ssh.DiscardRequests(requests) + channel.Close() + } +} diff --git a/pkg/httpinfo/types.go b/pkg/httpinfo/types.go index edcce331..d601c21f 100644 --- a/pkg/httpinfo/types.go +++ b/pkg/httpinfo/types.go @@ -16,6 +16,7 @@ package httpinfo import ( + "crypto/tls" "crypto/x509" "encoding/base64" "encoding/pem" @@ -25,6 +26,7 @@ import ( "strings" cmn "github.com/pzaino/thecrowler/pkg/common" + cfg "github.com/pzaino/thecrowler/pkg/config" ) // Config is a struct to specify the configuration for header extraction @@ -34,7 +36,9 @@ type Config struct { FollowRedirects bool Timeout int SSLMode string - SSLDiscovery bool + SSLDiscovery cfg.SSLScoutConfig + SSHDiscovery bool + Proxies []cfg.SOCKSProxy // SOCKS proxies } // HTTPDetails is a struct to store the collected HTTP header information @@ -135,37 +139,52 @@ type SSLInfo struct { IsCertEVSGCCodeSigning bool `json:"is_cert_ev_sgc_ca_code_signing_ev"` IsCertEVSGCCodeSigningSSL bool `json:"is_cert_ev_sgc_ca_code_signing_ev_ssl"` CertExpiration cmn.FlexibleDate `json:"cert_expiration"` + Fingerprints map[string]string `json:"fingerprints"` } // SSLDetails is identical to SSLInfo, however it is designed to be easy to unmarshal/marshal // from/to JSON, so it's used to store data on the DB and return data from requests. type SSLDetails struct { - URL string `json:"url"` - Issuers []string `json:"issuers"` // List of issuers - OwnerOrganizations []string `json:"owner_organizations"` // Organizations - OwnerOrganizationalUnits []string `json:"owner_organizational_units"` // Organizational Units - OwnerCountries []string `json:"owner_countries"` // Countries - OwnerStates []string `json:"owner_states"` // States - OwnerLocalities []string `json:"owner_localities"` // Localities - OwnerCommonNames []string `json:"owner_common_names"` // Common Names - FQDNs []string `json:"fqdns"` // List of FQDNs the certificate is valid for - PublicKeys []string `json:"public_keys"` // Public key info, possibly base64-encoded - SignatureAlgorithms []string `json:"signature_algorithms"` // Signature algorithms used - CertChains []CertChain `json:"cert_chain"` // Base64-encoded certificates - IsCertChainOrderValid bool `json:"is_cert_chain_order_valid"` - IsRootTrustworthy bool `json:"is_root_trustworthy"` - IsCertValid bool `json:"is_cert_valid"` - IsCertExpired bool `json:"is_cert_expired"` - IsCertRevoked bool `json:"is_cert_revoked"` - IsCertSelfSigned bool `json:"is_cert_self_signed"` - IsCertCA bool `json:"is_cert_ca"` - IsCertIntermediate bool `json:"is_cert_intermediate"` - IsCertLeaf bool `json:"is_cert_leaf"` - IsCertTrusted bool `json:"is_cert_trusted"` - IsCertTechnicallyConstrained bool `json:"is_cert_technically_constrained"` - IsCertEV bool `json:"is_cert_ev"` - IsCertEVSSL bool `json:"is_cert_ev_ssl"` - CertExpiration string `json:"cert_expiration"` // Use string to simplify + URL string `json:"url"` + Issuers []string `json:"issuers"` // List of issuers + OwnerOrganizations []string `json:"owner_organizations"` // Organizations + OwnerOrganizationalUnits []string `json:"owner_organizational_units"` // Organizational Units + OwnerCountries []string `json:"owner_countries"` // Countries + OwnerStates []string `json:"owner_states"` // States + OwnerLocalities []string `json:"owner_localities"` // Localities + OwnerCommonNames []string `json:"owner_common_names"` // Common Names + FQDNs []string `json:"fqdns"` // List of FQDNs the certificate is valid for + PublicKeys []string `json:"public_keys"` // Public key info, possibly base64-encoded + SignatureAlgorithms []string `json:"signature_algorithms"` // Signature algorithms used + CertChains []CertChain `json:"cert_chain"` // Base64-encoded certificates + IsCertChainOrderValid bool `json:"is_cert_chain_order_valid"` + IsRootTrustworthy bool `json:"is_root_trustworthy"` + IsCertValid bool `json:"is_cert_valid"` + IsCertExpired bool `json:"is_cert_expired"` + IsCertRevoked bool `json:"is_cert_revoked"` + IsCertSelfSigned bool `json:"is_cert_self_signed"` + IsCertCA bool `json:"is_cert_ca"` + IsCertIntermediate bool `json:"is_cert_intermediate"` + IsCertLeaf bool `json:"is_cert_leaf"` + IsCertTrusted bool `json:"is_cert_trusted"` + IsCertTechnicallyConstrained bool `json:"is_cert_technically_constrained"` + IsCertEV bool `json:"is_cert_ev"` + IsCertEVSSL bool `json:"is_cert_ev_ssl"` + CertExpiration string `json:"cert_expiration"` // Use string to simplify + Fingerprints map[string]string `json:"fingerprints,omitempty"` +} + +// CollectedData is a struct to store the collected data from a TLS handshake +type CollectedData struct { + TLSClientHello []byte + TLSClientHelloInfo *tls.ClientHelloInfo + TLSHandshakeState tls.ConnectionState + TLSCertificates []*x509.Certificate + RawClientHello []byte + RawServerHello []byte + SSHClientHello []byte + SSHServerHello []byte + JARMFingerprint string } // CertChain is a struct to store the base64-encoded certificate chain @@ -260,6 +279,7 @@ func ConvertSSLInfoToDetails(info SSLInfo) (SSLDetails, error) { IsCertEV: info.IsCertEV, IsCertEVSSL: info.IsCertEVSSL, CertExpiration: info.CertExpiration.String(), + Fingerprints: info.Fingerprints, }, nil } diff --git a/schemas/ruleset-schema.json b/schemas/ruleset-schema.json index 3af53ee1..ae431f7a 100644 --- a/schemas/ruleset-schema.json +++ b/schemas/ruleset-schema.json @@ -434,7 +434,7 @@ "items": { "type": "string" }, - "description": "The expected value of the HTTP header field. You can start your micro-signature using ^ to match the beginning of the value. Start it with $ to match the end of the value. Start it with ! to exclude the value (so assign the confidence if the there isn't a match). This value is NOT a regex pattern, but a simple string pattern. Use '*' to match any value." + "description": "The expected value of the HTTP header field. You can use Perl-Compatible Regular Expressions (PCRE) to write your signatures and patterns." }, "confidence": { "type": "number", @@ -462,11 +462,11 @@ "items": { "type": "string" }, - "description": "The pattern to match within the tag's attribute content. You can start your micro-signature using ^ to match the beginning of the value. Start it with $ to match the end of the value. Start it with ! to exclude the value (so assign the confidence if the there isn't a match). This value is NOT a regex pattern, but a simple string pattern. Use '*' to match any value." + "description": "The pattern to match within the tag's attribute content. You can use Perl-Compatible Regular Expressions (PCRE) to write your signatures and patterns." }, "text": { "type": "string", - "description": "Optional. The text to match in the tag's innerText. You can start your micro-signature using ^ to match the beginning of the value. Start it with $ to match the end of the value. Start it with ! to exclude the value (so assign the confidence if the there isn't a match). This value is NOT a regex pattern, but a simple string pattern. Use '*' to match any value." + "description": "Optional. The text to match in the tag's innerText. You can use Perl-Compatible Regular Expressions (PCRE) to write your signatures and patterns." }, "confidence": { "type": "number", @@ -491,7 +491,7 @@ "items": { "type": "string" }, - "description": "The pattern to match within the field's value. You can start your micro-signature using ^ to match the beginning of the value. Start it with $ to match the end of the value. Start it with ! to exclude the value (so assign the confidence if the there isn't a match). This value is NOT a regex pattern, but a simple string pattern. Use '*' to match any value." + "description": "The pattern to match within the field's value. You can use Perl-Compatible Regular Expressions (PCRE) to write your signatures and patterns." }, "confidence": { "type": "number", @@ -508,7 +508,7 @@ "properties": { "value": { "type": "string", - "description": "The micro-signature to match in the URL. This is NOT a regex pattern, but a simple string pattern." + "description": "The micro-signature to match in the URL. You can use Perl-Compatible Regular Expressions (PCRE) to write your signatures and patterns." }, "confidence": { "type": "number", @@ -537,7 +537,7 @@ }, "content": { "type": "string", - "description": "The content attribute of the meta tag, which holds the value to match. You can start your micro-signature using ^ to match the beginning of the value. Start it with $ to match the end of the value. Start it with ! to exclude the value (so assign the confidence if the there isn't a match). This value is NOT a regex pattern, but a simple string pattern. Use '*' to match any value." + "description": "The content attribute of the meta tag, which holds the value to match. You can use Perl-Compatible Regular Expressions (PCRE) to write your signatures and patterns." } } }, diff --git a/schemas/ruleset-schema.yaml b/schemas/ruleset-schema.yaml index d522c321..f785ffa1 100644 --- a/schemas/ruleset-schema.yaml +++ b/schemas/ruleset-schema.yaml @@ -335,7 +335,7 @@ items: type: "array" items: type: "string" - description: "The expected value of the HTTP header field. You can start your micro-signature using ^ to match the beginning of the value. Start it with $ to match the end of the value. Start it with ! to exclude the value (so assign the confidence if the there isn't a match). This value is NOT a regex pattern, but a simple string pattern. Use '*' to match any value." + description: "The expected value of the HTTP header field. You can use Perl-Compatible Regular Expressions (PCRE) to write your signatures and patterns." confidence: type: "number" description: "Optional. The confidence level for the match, ranging from 0 to 10." @@ -355,10 +355,10 @@ items: type: "array" items: type: "string" - description: "The pattern to match within the tag's attribute content. You can start your micro-signature using ^ to match the beginning of the value. Start it with $ to match the end of the value. Start it with ! to exclude the value (so assign the confidence if the there isn't a match). This value is NOT a regex pattern, but a simple string pattern. Use '*' to match any value." + description: "The pattern to match within the tag's attribute content. You can use Perl-Compatible Regular Expressions (PCRE) to write your signatures and patterns." text: type: "string" - description: "Optional. The text to match in the tag's innerText. You can start your micro-signature using ^ to match the beginning of the value. Start it with $ to match the end of the value. Start it with ! to exclude the value (so assign the confidence if the there isn't a match). This value is NOT a regex pattern, but a simple string pattern. Use '*' to match any value." + description: "Optional. The text to match in the tag's innerText. You can use Perl-Compatible Regular Expressions (PCRE) to write your signatures and patterns." confidence: type: "number" description: "Optional. The confidence level for the detection, decimal number ranging from 0 to 10 (or whatever set in the detection_configuration)." @@ -376,7 +376,7 @@ items: type: "array" items: type: "string" - description: "The pattern to match within the field's value. You can start your micro-signature using ^ to match the beginning of the value. Start it with $ to match the end of the value. Start it with ! to exclude the value (so assign the confidence if the there isn't a match). This value is NOT a regex pattern, but a simple string pattern. Use '*' to match any value." + description: "The pattern to match within the field's value. You can use Perl-Compatible Regular Expressions (PCRE) to write your signatures and patterns." confidence: type: "number" description: "Optional. The confidence level for the detection, decimal number ranging from 0 to 10 (or whatever set in the detection_configuration)." @@ -388,7 +388,7 @@ items: properties: value: type: "string" - description: "The micro-signature to match in the URL. This is NOT a regex pattern, but a simple string pattern." + description: "The micro-signature to match in the URL. You can use Perl-Compatible Regular Expressions (PCRE) to write your signatures and patterns." confidence: type: "number" description: "Optional. The confidence level for the match, decimal number ranging from 0 to 10 (or whatever set in the detection_configuration)." @@ -409,7 +409,7 @@ items: description: "The name attribute of the meta tag." content: type: "string" - description: "The content attribute of the meta tag, which holds the value to match. You can start your micro-signature using ^ to match the beginning of the value. Start it with $ to match the end of the value. Start it with ! to exclude the value (so assign the confidence if the there isn't a match). This value is NOT a regex pattern, but a simple string pattern. Use '*' to match any value." + description: "The content attribute of the meta tag, which holds the value to match. You can use Perl-Compatible Regular Expressions (PCRE) to write your signatures and patterns." description: "Matching patterns for meta tags to identify technology." required: - "rule_name"