I am working on a simple python script to extract certain data from an xml file. The xml contains windows events and eventid. Below I am showing the code. It is failing when it needs to extract the data, but it is creating the file but is empty.
from xml.etree import ElementTree as ET
import csv
tree = ET.parse("SecurityLog-rev2.xml")
root = tree.getroot()
url = root[0].tag[:-len("Event")]
fieldnames = ['EventID']
with open ('event_log.csv', 'w') as csvfile:
writecsv = csv.DictWriter(csvfile, fieldnames = fieldnames)
writecsv.writeheader()
for event in root:
system = event.find(url + "System")
output = {}
fields = ['EventID']
# for tag,att in fields:
# output[tag] = system.find(url + tag).attrib[att]
if event.find(url + "EventData") != None:
for data in event.find(url + "EventData"):
name = data.attrib['Name']
output[name] = data.text
writecsv.writerow(output)
<Event xmlns='http://schemas.microsoft.com/win/2004/08/events/event'><System><Provider Name='Microsoft-Windows-Security-Auditing' Guid='{54849625-5478-4994-A5BA-3E3B0328C30D}'/>
<EventID>4634</EventID>
<Version>0</Version><Level>0</Level><Task>12545</Task><Opcode>0</Opcode><Keywords>0x8020000000000000</Keywords><TimeCreated SystemTime='2011-04-16T15:07:53.890625000Z'/>
<EventRecordID>1410962</EventRecordID><Correlation/><Execution ProcessID='452' ThreadID='3900'/><Channel>Security</Channel><Computer>DC01.AFC.com</Computer><Security/></System>
<EventData><Data Name='TargetUserSid'>S-1-5-21-2795111079-3225111112-3329435632-1610</Data>
<Data Name='TargetUserName'>grant.larson</Data>
<Data Name='TargetDomainName'>AFC</Data><Data Name='TargetLogonId'>0x3642df8</Data><Data Name='LogonType'>3</Data></EventData></Event>
I am not sure what exactly you would parse. Here is a solution for the Id and the events:
Your XML File provided above as Input:
<?xml version="1.0" encoding="utf-8"?>
<Event xmlns='http://schemas.microsoft.com/win/2004/08/events/event'>
<System>
<Provider Name='Microsoft-Windows-Security-Auditing' Guid='{54849625-5478-4994-A5BA-3E3B0328C30D}' />
<EventID>4634</EventID>
<Version>0</Version>
<Level>0</Level>
<Task>12545</Task>
<Opcode>0</Opcode>
<Keywords>0x8020000000000000</Keywords>
<TimeCreated SystemTime='2011-04-16T15:07:53.890625000Z' />
<EventRecordID>1410962</EventRecordID>
<Correlation />
<Execution ProcessID='452' ThreadID='3900' />
<Channel>Security</Channel>
<Computer>DC01.AFC.com</Computer>
<Security />
</System>
<EventData>
<Data Name='TargetUserSid'>S-1-5-21-2795111079-3225111112-3329435632-1610</Data>
<Data Name='TargetUserName'>grant.larson</Data>
<Data Name='TargetDomainName'>AFC</Data>
<Data Name='TargetLogonId'>0x3642df8</Data>
<Data Name='LogonType'>3</Data>
</EventData>
</Event>
The program code without regex for catching the namespace:
from xml.etree import ElementTree as ET
import pandas as pd
import csv
tree = ET.parse("SecurityLog-rev2.xml")
root = tree.getroot()
ns = "{http://schemas.microsoft.com/win/2004/08/events/event}"
data = []
for eventID in root.findall(".//"):
if eventID.tag == f"{ns}System":
for e_id in eventID.iter():
if e_id.tag == f'{ns}EventID':
row = "EventID", e_id.text
data.append(row)
if eventID.tag == f"{ns}EventData":
for attr in eventID.iter():
if attr.tag == f'{ns}Data':
#print(attr.attrib)
row = attr.get('Name'), attr.text
data.append(row)
df = pd.DataFrame.from_dict(data, orient='columns')
df.to_csv('event_log.csv', index=False, header=False)
print(df)
Output:
0 1
0 EventID 4634
1 TargetUserSid S-1-5-21-2795111079-3225111112-3329435632-1610
2 TargetUserName grant.larson
3 TargetDomainName AFC
4 TargetLogonId 0x3642df8
5 LogonType 3
The CSV File doesn't contain the index and header:
EventID,4634
TargetUserSid,S-1-5-21-2795111079-3225111112-3329435632-1610
TargetUserName,grant.larson
TargetDomainName,AFC
TargetLogonId,0x3642df8
LogonType,3
You can tanspose() the output:
df.T.to_csv('event_log.csv', index=False, header=False)