Is there an explanation anywhere for what the score
field from the sophos static/dynamic file analysis report means?
The schema simply states: Maliciousness score of the analyzed file (0 = malicious, 100 = benign).
I expected this to be interpreted the same way as the file hash lookup reputationScore
:
The following ranges are defined:
[0-19]: Malware
[20-29]: PUA (potentially unwanted application)
[30-69]: Unknown/suspicious
[70-100]: Known good
However, I've received a score of 10
which would indicate malware, but I have used a safe PDF file, which seems unexpected.
Does Sophos think the file is malicious, if it responds with a report score of 10 for static file analysis?
This is the response from Sophos:
{
"jobId": "3aee2c04a73bb64b3572271389cc2e95",
"jobStatus": "SUCCESS",
"report": {
"analysis_subject": {
"mime_type": "application/pdf",
"sha1": "5b03ccec77b416805d6d8e270d33942aaedcc6dd",
"sha256": "f6edcd8a1b4f7cb85486d0c6777f9174eadbc4d1d0d9e5aeba7132f30b34bc3e"
},
"analysis_summary": [
{
"description": "Document contains links to external domains",
"name": "edr_contains_domain_links",
"severity": 1
},
{
"description": "Document file size is small",
"name": "edr_info_file_size_small",
"severity": 1
},
{
"description": "Document has a small number of pages",
"name": "edr_info_page_count_small",
"severity": 1
}
],
"analysis_type": "static",
"detection": {
"permalink": "https://www.virustotal.com/gui/file/f6edcd8a1b4f7cb85486d0c6777f9174eadbc4d1d0d9e5aeba7132f30b34bc3e/detection/f-f6edcd8a1b4f7cb85486d0c6777f9174eadbc4d1d0d9e5aeba7132f30b34bc3e-1656684162",
"positives": 0,
"sophos": "",
"sophos_ml": "",
"total": 59
},
"document_analysis": {
"meta_data": {
"author": "Yukon Department of Education",
"bytes": 20597,
"content_type": "PDF",
"encryption": "Standard V2.3 (128-bit)",
"language": "EN-US",
"last_saved_time": "2008-06-04T15:47:36Z",
"num_pages": 1,
"title": "PDF Test Page",
"version": 1.6
}
},
"linked_with_dynamic_analysis": false,
"ml_aggregate_results": {
"overall_score": 30
},
"ml_file": {
"analyses": {
"black_box": {
"benign": {
"raw": 0.39815810322761536,
"score": 30
},
"model_name": "dsml_model_pdf",
"model_version": "20211118"
},
"feature_intersections": [
{
"benign": 7120629,
"benign_fraction": 0.7120629263895423,
"category": "severity=1",
"description": "Feature NOT Observed: Document file size is large",
"indicator": "Feature NOT Observed: Document file size is large --> severity=1",
"malware": 9997092,
"malware_fraction": 0.9997092138044599,
"probability": 0.5840200283264096,
"scale_factor": 10000000
},
{
"benign": 617857,
"benign_fraction": 0.06178572053380388,
"category": "severity=1",
"description": "Feature Observed: Document has a small number of pages",
"indicator": "Feature Observed: Document has a small number of pages --> severity=1",
"malware": 5394909,
"malware_fraction": 0.5394909111791909,
"probability": 0.8972424383801834,
"scale_factor": 10000000
},
{
"benign": 537367,
"benign_fraction": 0.0537367720738131,
"category": "severity=2",
"description": "Feature NOT Observed: Document contains behaviour that executes on open",
"indicator": "Feature NOT Observed: Document contains behaviour that executes on open --> severity=2",
"malware": 5372856,
"malware_fraction": 0.5372856378987031,
"probability": 0.9090782833830074,
"scale_factor": 10000000
},
{
"benign": 536021,
"benign_fraction": 0.0536021766615527,
"category": "severity=2",
"description": "Feature NOT Observed: Document contains javascript",
"indicator": "Feature NOT Observed: Document contains javascript --> severity=2",
"malware": 5371043,
"malware_fraction": 0.5371043501402938,
"probability": 0.9092575175159124,
"scale_factor": 10000000
},
{
"benign": 509534,
"benign_fraction": 0.050953467474390626,
"category": "severity=1",
"description": "Feature NOT Observed: Document is possibly a phishing PDF",
"indicator": "Feature NOT Observed: Document is possibly a phishing PDF --> severity=1",
"malware": 5364929,
"malware_fraction": 0.5364929575695063,
"probability": 0.9132627839711798,
"scale_factor": 10000000
},
{
"benign": 428577,
"benign_fraction": 0.04285774978628207,
"category": "severity=2",
"description": "Feature NOT Observed: Field contains potentially suspicious content",
"indicator": "Feature NOT Observed: Field contains potentially suspicious content --> severity=2",
"malware": 5364293,
"malware_fraction": 0.5364293727421457,
"probability": 0.9260163947729065,
"scale_factor": 10000000
}
],
"feature_maliciousness": {
"Document contains links to external domains --> severity=1": {
"benign": 1828421,
"benign_fraction": 0.18284217995357024,
"category": "severity=1",
"description": "Document contains links to external domains",
"indicator": "Document contains links to external domains --> severity=1",
"malware": 9869267,
"malware_fraction": 0.9869267695627242,
"probability": 0.8436937653122213,
"scale_factor": 10000000
},
"Document file size is small --> severity=1": {
"benign": 7120119,
"benign_fraction": 0.7120119817558322,
"category": "severity=1",
"description": "Document file size is small",
"indicator": "Document file size is small --> severity=1",
"malware": 9899084,
"malware_fraction": 0.9899084471664678,
"probability": 0.581642026468478,
"scale_factor": 10000000
},
"Document has a small number of pages --> severity=1": {
"benign": 4086919,
"benign_fraction": 0.40869198927301453,
"category": "severity=1",
"description": "Document has a small number of pages",
"indicator": "Document has a small number of pages --> severity=1",
"malware": 5416129,
"malware_fraction": 0.5416129670799382,
"probability": 0.569935959461394,
"scale_factor": 10000000
}
},
"genetic_analysis": {
"neighbor_info": {
"1f1006182c2e9b6e2b09b07f9be9e122fdc1e681577af68984ab63a076a15fed": {
"filepath": "1f1006182c2e9b6e2b09b07f9be9e122fdc1e681577af68984ab63a076a15fed",
"is_malware": false,
"match_percentage": 0.25,
"score": 66.06397
},
"672cfdffbc33f07c0ad65633cbf610c5ec4bb7787c72d84a5460266aaa9a2dfa": {
"filepath": "672cfdffbc33f07c0ad65633cbf610c5ec4bb7787c72d84a5460266aaa9a2dfa",
"is_malware": false,
"match_percentage": 0.21875,
"score": 62.829075
},
"6cdde8eee67aa38917dfa4249f91381ffa983f2ff95a84d0f6076a4ddecf3de8": {
"filepath": "6cdde8eee67aa38917dfa4249f91381ffa983f2ff95a84d0f6076a4ddecf3de8",
"is_malware": false,
"match_percentage": 0.21875,
"score": 63.53914
},
"9a0d27944893e40316037fd47fb4d9836c1518705b1baa4a0ebf0fe34b045c00": {
"filepath": "9a0d27944893e40316037fd47fb4d9836c1518705b1baa4a0ebf0fe34b045c00",
"is_malware": false,
"match_percentage": 0.1875,
"score": 58.78177
},
"a881bffc0893ae55112a9370f9cf693c3893d672b96c2160e341d9f20d47cd2f": {
"filepath": "a881bffc0893ae55112a9370f9cf693c3893d672b96c2160e341d9f20d47cd2f",
"is_malware": false,
"match_percentage": 0.8125,
"score": 234.79837
},
"add263021a636c93d1fd6f9d7ac880ac8afaacc917dca01dbb66d388c71d1e6c": {
"filepath": "add263021a636c93d1fd6f9d7ac880ac8afaacc917dca01dbb66d388c71d1e6c",
"is_malware": false,
"match_percentage": 0.1875,
"score": 59.46551
}
},
"neighbor_matrix": {
"1f1006182c2e9b6e2b09b07f9be9e122fdc1e681577af68984ab63a076a15fed": {
"0_6659": false,
"10_9152": false,
"11_4861": false,
"12_5543": false,
"13_3732": false,
"14_5431": false,
"15_5899": false,
"16_1078": false,
"17_2637": true,
"18_6885": false,
"19_8710": false,
"1_7974": false,
"20_6372": true,
"21_7672": false,
"22_8447": false,
"23_5023": false,
"24_7353": false,
"25_4809": false,
"26_7069": true,
"27_5993": false,
"28_2717": true,
"29_2739": true,
"2_7985": true,
"30_7482": true,
"31_5233": false,
"3_7524": false,
"4_6424": true,
"5_110": false,
"6_8324": false,
"7_6214": false,
"8_7332": false,
"9_8770": false
},
"672cfdffbc33f07c0ad65633cbf610c5ec4bb7787c72d84a5460266aaa9a2dfa": {
"0_6659": false,
"10_9152": false,
"11_4861": false,
"12_5543": false,
"13_3732": false,
"14_5431": false,
"15_5899": false,
"16_1078": false,
"17_2637": true,
"18_6885": false,
"19_8710": false,
"1_7974": true,
"20_6372": true,
"21_7672": false,
"22_8447": false,
"23_5023": false,
"24_7353": false,
"25_4809": true,
"26_7069": false,
"27_5993": false,
"28_2717": true,
"29_2739": false,
"2_7985": false,
"30_7482": false,
"31_5233": false,
"3_7524": false,
"4_6424": true,
"5_110": false,
"6_8324": false,
"7_6214": false,
"8_7332": false,
"9_8770": true
},
"6cdde8eee67aa38917dfa4249f91381ffa983f2ff95a84d0f6076a4ddecf3de8": {
"0_6659": false,
"10_9152": false,
"11_4861": false,
"12_5543": false,
"13_3732": false,
"14_5431": false,
"15_5899": false,
"16_1078": true,
"17_2637": false,
"18_6885": false,
"19_8710": false,
"1_7974": false,
"20_6372": true,
"21_7672": false,
"22_8447": false,
"23_5023": false,
"24_7353": false,
"25_4809": false,
"26_7069": true,
"27_5993": false,
"28_2717": true,
"29_2739": false,
"2_7985": true,
"30_7482": true,
"31_5233": false,
"3_7524": false,
"4_6424": true,
"5_110": false,
"6_8324": false,
"7_6214": false,
"8_7332": false,
"9_8770": false
},
"9a0d27944893e40316037fd47fb4d9836c1518705b1baa4a0ebf0fe34b045c00": {
"0_6659": false,
"10_9152": false,
"11_4861": false,
"12_5543": false,
"13_3732": true,
"14_5431": false,
"15_5899": false,
"16_1078": true,
"17_2637": false,
"18_6885": true,
"19_8710": false,
"1_7974": true,
"20_6372": false,
"21_7672": false,
"22_8447": false,
"23_5023": false,
"24_7353": true,
"25_4809": false,
"26_7069": false,
"27_5993": false,
"28_2717": true,
"29_2739": false,
"2_7985": false,
"30_7482": false,
"31_5233": false,
"3_7524": false,
"4_6424": false,
"5_110": false,
"6_8324": false,
"7_6214": false,
"8_7332": false,
"9_8770": false
},
"a881bffc0893ae55112a9370f9cf693c3893d672b96c2160e341d9f20d47cd2f": {
"0_6659": true,
"10_9152": true,
"11_4861": true,
"12_5543": true,
"13_3732": true,
"14_5431": true,
"15_5899": true,
"16_1078": true,
"17_2637": true,
"18_6885": false,
"19_8710": true,
"1_7974": false,
"20_6372": true,
"21_7672": true,
"22_8447": false,
"23_5023": true,
"24_7353": true,
"25_4809": true,
"26_7069": true,
"27_5993": true,
"28_2717": true,
"29_2739": true,
"2_7985": true,
"30_7482": true,
"31_5233": false,
"3_7524": true,
"4_6424": true,
"5_110": false,
"6_8324": true,
"7_6214": false,
"8_7332": true,
"9_8770": true
},
"add263021a636c93d1fd6f9d7ac880ac8afaacc917dca01dbb66d388c71d1e6c": {
"0_6659": false,
"10_9152": false,
"11_4861": false,
"12_5543": false,
"13_3732": false,
"14_5431": false,
"15_5899": false,
"16_1078": false,
"17_2637": false,
"18_6885": false,
"19_8710": false,
"1_7974": false,
"20_6372": true,
"21_7672": false,
"22_8447": false,
"23_5023": false,
"24_7353": false,
"25_4809": true,
"26_7069": false,
"27_5993": false,
"28_2717": true,
"29_2739": false,
"2_7985": true,
"30_7482": true,
"31_5233": false,
"3_7524": false,
"4_6424": true,
"5_110": false,
"6_8324": false,
"7_6214": false,
"8_7332": false,
"9_8770": false
}
}
}
},
"analyzed_counts": {
"black_box": {
"benign": 0,
"malware": 0
},
"feature_intersections": {
"benign": 2798922,
"malware": 6340055
},
"feature_maliciousness": {
"benign": 2798922,
"malware": 6340055
},
"genetic_analysis": {
"benign": 7701633,
"malware": 2298367
}
},
"overall_score": 30,
"overall_scores": {
"black_box": 30,
"feature_intersections": 15,
"feature_maliciousness": 15,
"genetic_analysis": 13
}
},
"ml_filepath": {
"analyses": {
"neighbor_maliciousness": {
"most_similar": [],
"most_similar_benign": [],
"most_similar_malware": []
}
},
"analyzed_counts": {
"neighbor_maliciousness": {
"benign": -1,
"malware": -1
}
},
"overall_score": -1,
"overall_scores": {
"neighbor_maliciousness": -1
}
},
"ml_inputs": {
"filepath": null
},
"object_type": "file",
"reputation": {
"first_seen": "2022-02-08T19:28:46",
"last_seen": "2022-07-04T07:46:43",
"prevalence": "Popular",
"score": 62,
"score_string": "Prevalent"
},
"schema_version": "1.1.0",
"score": 10,
"submission": "2022-07-04T08:43:34Z",
"target": {
"file_name": "pdf-test.pdf",
"mime_type": "application/pdf",
"object_id": "f6edcd8a1b4f7cb85486d0c6777f9174eadbc4d1d0d9e5aeba7132f30b34bc3e",
"sha1": "5b03ccec77b416805d6d8e270d33942aaedcc6dd",
"sha256": "f6edcd8a1b4f7cb85486d0c6777f9174eadbc4d1d0d9e5aeba7132f30b34bc3e"
}
},
"requestId": "68db2f66-c63e-4a04-93f9-7067231e42e1"
}
There are a couple of interesting points in your question. Let's start with the scoring.
You are correct the API documentation is not entirely accurate. A score <20 is malicious and >70 is clean. You can see a sample implementation of processing the scores around line 139 here.
In the case of the report that you provided the ML analyzers are causing the file to be convicted. From the report it looks like the following file features (which are commonly seen in malicious files) are causing the ML model to believe the file is malicious:
Looking at the dynamic analysis results and the information from Virus Total etc. this could be a false positive and should be escalated to Sophos. The escalation path for FP / FN's is here:
https://support.sophos.com/support/s/filesubmission?language=en_US