I am getting an exception
XmlException: Unexpected node type EntityReference. ReadElementString method can only be called on elements with simple or empty content.
When using this simple bit of code:
using (StreamReader reader = new StreamReader(filePath, Encoding.UTF8))
XmlSerializer serializer = new XmlSerializer(typeof(entry[]), new XmlRootAttribute("JMdict"));
return (entry[])serializer.Deserialize(reader);
I used xsd.exe to generate my c# classes from the xsd file. (note: truncated because of body length but I think the essential is there to solve the problem):
// <auto-generated>
// This code was generated by a tool.
// Runtime Version:4.0.30319.42000
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
using System.Xml.Serialization;
// This source code was auto-generated by xsd, Version=4.7.2046.0.
/// <remarks/>
[System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "4.7.2046.0")]
[System.Xml.Serialization.XmlRoot("JMdict", IsNullable = false)]
public partial class JMdict
private entry[] entryField;
/// <remarks/>
public entry[] entry
return this.entryField;
this.entryField = value;
/// <remarks/>
[System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "4.7.2046.0")]
public partial class entry
private string ent_seqField;
private k_ele[] k_eleField;
private r_ele[] r_eleField;
private sense[] senseField;
/// <remarks/>
public string ent_seq
return this.ent_seqField;
this.ent_seqField = value;
/// <remarks/>
public k_ele[] k_ele
return this.k_eleField;
this.k_eleField = value;
/// <remarks/>
public r_ele[] r_ele
return this.r_eleField;
this.r_eleField = value;
/// <remarks/>
public sense[] sense
return this.senseField;
this.senseField = value;
/// <remarks/>
[System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "4.7.2046.0")]
public partial class k_ele
private string kebField;
private string[] ke_infField;
private string[] ke_priField;
/// <remarks/>
public string keb
return this.kebField;
this.kebField = value;
/// <remarks/>
public string[] ke_inf
return this.ke_infField;
this.ke_infField = value;
/// <remarks/>
public string[] ke_pri
return this.ke_priField;
this.ke_priField = value;
/// <remarks/>
[System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "4.7.2046.0")]
public partial class r_ele
private string rebField;
private string re_nokanjiField;
private string[] re_restrField;
private string[] re_infField;
private string[] re_priField;
/// <remarks/>
public string reb
return this.rebField;
this.rebField = value;
/// <remarks/>
public string re_nokanji
return this.re_nokanjiField;
this.re_nokanjiField = value;
/// <remarks/>
public string[] re_restr
return this.re_restrField;
this.re_restrField = value;
/// <remarks/>
public string[] re_inf
return this.re_infField;
this.re_infField = value;
/// <remarks/>
public string[] re_pri
return this.re_priField;
this.re_priField = value;
/// <remarks/>
[System.CodeDom.Compiler.GeneratedCodeAttribute("xsd", "4.7.2046.0")]
public partial class sense
private string[] stagkField;
private string[] stagrField;
private string[] posField;
private string[] xrefField;
private string[] antField;
private string[] fieldField;
private string[] miscField;
private string[] s_infField;
private lsource[] lsourceField;
private string[] dialField;
private gloss[] glossField;
/// <remarks/>
public string[] stagk
return this.stagkField;
this.stagkField = value;
/// <remarks/>
public string[] stagr
return this.stagrField;
this.stagrField = value;
/// <remarks/>
public string[] pos
return this.posField;
this.posField = value;
/// <remarks/>
public string[] xref
return this.xrefField;
this.xrefField = value;
/// <remarks/>
public string[] ant
return this.antField;
this.antField = value;
/// <remarks/>
public string[] field
return this.fieldField;
this.fieldField = value;
/// <remarks/>
public string[] misc
return this.miscField;
this.miscField = value;
/// <remarks/>
public string[] s_inf
return this.s_infField;
this.s_infField = value;
/// <remarks/>
public lsource[] lsource
return this.lsourceField;
this.lsourceField = value;
/// <remarks/>
public string[] dial
return this.dialField;
this.dialField = value;
/// <remarks/>
public gloss[] gloss
return this.glossField;
this.glossField = value;
Here is the start of the xml file I am trying to deserialize:
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE JMdict [
<!ELEMENT JMdict (entry*)>
<!ELEMENT entry (ent_seq,k_ele*,r_ele+,sense+)>
<!ELEMENT ent_seq (#PCDATA)>
<!ELEMENT k_ele (keb,ke_inf*,ke_pri*)>
<!ELEMENT ke_inf (#PCDATA)>
<!ELEMENT ke_pri (#PCDATA)>
<!ELEMENT r_ele (reb,re_nokanji?,re_restr*,re_inf*,re_pri*)>
<!ELEMENT re_nokanji (#PCDATA)>
<!ELEMENT re_restr (#PCDATA)>
<!ELEMENT re_inf (#PCDATA)>
<!ELEMENT re_pri (#PCDATA)>
<!ELEMENT sense (stagk*,stagr*,pos*,xref*,ant*,field*,misc*,s_inf*,lsource*,dial*,gloss*)>
<!ELEMENT stagk (#PCDATA)>
<!ELEMENT stagr (#PCDATA)>
<!ELEMENT xref (#PCDATA)*>
<!ELEMENT field (#PCDATA)>
<!ELEMENT lsource (#PCDATA)>
<!ATTLIST lsource xml:lang CDATA "eng">
<!ATTLIST lsource ls_type CDATA #IMPLIED>
<!ATTLIST lsource ls_wasei CDATA #IMPLIED>
<!ELEMENT gloss (#PCDATA|pri)*>
<!ATTLIST gloss xml:lang CDATA "eng">
<!ATTLIST gloss g_gend CDATA #IMPLIED>
<!ELEMENT s_inf (#PCDATA)>
<!ENTITY MA "martial arts term">
<!ENTITY X "rude or X-rated term (not displayed in educational software)">
<!ENTITY abbr "abbreviation">
<!ENTITY adj-i "adjective (keiyoushi)">
<!ENTITY adj-ix "adjective (keiyoushi) - yoi/ii class">
<!ENTITY adj-na "adjectival nouns or quasi-adjectives (keiyodoshi)">
<!ENTITY adj-no "nouns which may take the genitive case particle `no'">
<!ENTITY adj-pn "pre-noun adjectival (rentaishi)">
<!ENTITY adj-t "`taru' adjective">
<!ENTITY adj-f "noun or verb acting prenominally">
<!ENTITY adv "adverb (fukushi)">
<!ENTITY adv-to "adverb taking the `to' particle">
<!ENTITY arch "archaism">
<!ENTITY ateji "ateji (phonetic) reading">
<!ENTITY aux "auxiliary">
<!ENTITY aux-v "auxiliary verb">
<!ENTITY aux-adj "auxiliary adjective">
<!ENTITY Buddh "Buddhist term">
<!ENTITY chem "chemistry term">
<!ENTITY chn "children's language">
<!ENTITY col "colloquialism">
<!ENTITY comp "computer terminology">
<!ENTITY conj "conjunction">
<!ENTITY cop-da "copula">
<!ENTITY ctr "counter">
<!ENTITY derog "derogatory">
<!ENTITY eK "exclusively kanji">
<!ENTITY ek "exclusively kana">
<!ENTITY exp "expressions (phrases, clauses, etc.)">
<!ENTITY fam "familiar language">
<!ENTITY fem "female term or language">
<!ENTITY food "food term">
<!ENTITY geom "geometry term">
<!ENTITY gikun "gikun (meaning as reading) or jukujikun (special kanji reading)">
<!ENTITY hon "honorific or respectful (sonkeigo) language">
<!ENTITY hum "humble (kenjougo) language">
<!ENTITY iK "word containing irregular kanji usage">
<!ENTITY id "idiomatic expression">
<!ENTITY ik "word containing irregular kana usage">
<!ENTITY int "interjection (kandoushi)">
<!ENTITY io "irregular okurigana usage">
<!ENTITY iv "irregular verb">
<!ENTITY ling "linguistics terminology">
<!ENTITY m-sl "manga slang">
<!ENTITY male "male term or language">
<!ENTITY male-sl "male slang">
<!ENTITY math "mathematics">
<!ENTITY mil "military">
<!ENTITY n "noun (common) (futsuumeishi)">
<!ENTITY n-adv "adverbial noun (fukushitekimeishi)">
<!ENTITY n-suf "noun, used as a suffix">
<!ENTITY n-pref "noun, used as a prefix">
<!ENTITY n-t "noun (temporal) (jisoumeishi)">
<!ENTITY num "numeric">
<!ENTITY oK "word containing out-dated kanji">
<!ENTITY obs "obsolete term">
<!ENTITY obsc "obscure term">
<!ENTITY ok "out-dated or obsolete kana usage">
<!ENTITY oik "old or irregular kana form">
<!ENTITY on-mim "onomatopoeic or mimetic word">
<!ENTITY pn "pronoun">
<!ENTITY poet "poetical term">
<!ENTITY pol "polite (teineigo) language">
<!ENTITY pref "prefix">
<!ENTITY proverb "proverb">
<!ENTITY prt "particle">
<!ENTITY physics "physics terminology">
<!ENTITY rare "rare">
<!ENTITY sens "sensitive">
<!ENTITY sl "slang">
<!ENTITY suf "suffix">
<!ENTITY uK "word usually written using kanji alone">
<!ENTITY uk "word usually written using kana alone">
<!ENTITY unc "unclassified">
<!ENTITY yoji "yojijukugo">
<!ENTITY v1 "Ichidan verb">
<!ENTITY v1-s "Ichidan verb - kureru special class">
<!ENTITY v2a-s "Nidan verb with 'u' ending (archaic)">
<!ENTITY v4h "Yodan verb with `hu/fu' ending (archaic)">
<!ENTITY v4r "Yodan verb with `ru' ending (archaic)">
<!ENTITY v5aru "Godan verb - -aru special class">
<!ENTITY v5b "Godan verb with `bu' ending">
<!ENTITY v5g "Godan verb with `gu' ending">
<!ENTITY v5k "Godan verb with `ku' ending">
<!ENTITY v5k-s "Godan verb - Iku/Yuku special class">
<!ENTITY v5m "Godan verb with `mu' ending">
<!ENTITY v5n "Godan verb with `nu' ending">
<!ENTITY v5r "Godan verb with `ru' ending">
<!ENTITY v5r-i "Godan verb with `ru' ending (irregular verb)">
<!ENTITY v5s "Godan verb with `su' ending">
<!ENTITY v5t "Godan verb with `tsu' ending">
<!ENTITY v5u "Godan verb with `u' ending">
<!ENTITY v5u-s "Godan verb with `u' ending (special class)">
<!ENTITY v5uru "Godan verb - Uru old class verb (old form of Eru)">
<!ENTITY vz "Ichidan verb - zuru verb (alternative form of -jiru verbs)">
<!ENTITY vi "intransitive verb">
<!ENTITY vk "Kuru verb - special class">
<!ENTITY vn "irregular nu verb">
<!ENTITY vr "irregular ru verb, plain form ends with -ri">
<!ENTITY vs "noun or participle which takes the aux. verb suru">
<!ENTITY vs-c "su verb - precursor to the modern suru">
<!ENTITY vs-s "suru verb - special class">
<!ENTITY vs-i "suru verb - irregular">
<!ENTITY kyb "Kyoto-ben">
<!ENTITY osb "Osaka-ben">
<!ENTITY ksb "Kansai-ben">
<!ENTITY ktb "Kantou-ben">
<!ENTITY tsb "Tosa-ben">
<!ENTITY thb "Touhoku-ben">
<!ENTITY tsug "Tsugaru-ben">
<!ENTITY kyu "Kyuushuu-ben">
<!ENTITY rkb "Ryuukyuu-ben">
<!ENTITY nab "Nagano-ben">
<!ENTITY hob "Hokkaido-ben">
<!ENTITY vt "transitive verb">
<!ENTITY vulg "vulgar expression or word">
<!ENTITY adj-kari "`kari' adjective (archaic)">
<!ENTITY adj-ku "`ku' adjective (archaic)">
<!ENTITY adj-shiku "`shiku' adjective (archaic)">
<!ENTITY adj-nari "archaic/formal form of na-adjective">
<!ENTITY n-pr "proper noun">
<!ENTITY v-unspec "verb unspecified">
<!ENTITY v4k "Yodan verb with `ku' ending (archaic)">
<!ENTITY v4g "Yodan verb with `gu' ending (archaic)">
<!ENTITY v4s "Yodan verb with `su' ending (archaic)">
<!ENTITY v4t "Yodan verb with `tsu' ending (archaic)">
<!ENTITY v4n "Yodan verb with `nu' ending (archaic)">
<!ENTITY v4b "Yodan verb with `bu' ending (archaic)">
<!ENTITY v4m "Yodan verb with `mu' ending (archaic)">
<!ENTITY v2k-k "Nidan verb (upper class) with `ku' ending (archaic)">
<!ENTITY v2g-k "Nidan verb (upper class) with `gu' ending (archaic)">
<!ENTITY v2t-k "Nidan verb (upper class) with `tsu' ending (archaic)">
<!ENTITY v2d-k "Nidan verb (upper class) with `dzu' ending (archaic)">
<!ENTITY v2h-k "Nidan verb (upper class) with `hu/fu' ending (archaic)">
<!ENTITY v2b-k "Nidan verb (upper class) with `bu' ending (archaic)">
<!ENTITY v2m-k "Nidan verb (upper class) with `mu' ending (archaic)">
<!ENTITY v2y-k "Nidan verb (upper class) with `yu' ending (archaic)">
<!ENTITY v2r-k "Nidan verb (upper class) with `ru' ending (archaic)">
<!ENTITY v2k-s "Nidan verb (lower class) with `ku' ending (archaic)">
<!ENTITY v2g-s "Nidan verb (lower class) with `gu' ending (archaic)">
<!ENTITY v2s-s "Nidan verb (lower class) with `su' ending (archaic)">
<!ENTITY v2z-s "Nidan verb (lower class) with `zu' ending (archaic)">
<!ENTITY v2t-s "Nidan verb (lower class) with `tsu' ending (archaic)">
<!ENTITY v2d-s "Nidan verb (lower class) with `dzu' ending (archaic)">
<!ENTITY v2n-s "Nidan verb (lower class) with `nu' ending (archaic)">
<!ENTITY v2h-s "Nidan verb (lower class) with `hu/fu' ending (archaic)">
<!ENTITY v2b-s "Nidan verb (lower class) with `bu' ending (archaic)">
<!ENTITY v2m-s "Nidan verb (lower class) with `mu' ending (archaic)">
<!ENTITY v2y-s "Nidan verb (lower class) with `yu' ending (archaic)">
<!ENTITY v2r-s "Nidan verb (lower class) with `ru' ending (archaic)">
<!ENTITY v2w-s "Nidan verb (lower class) with `u' ending and `we' conjugation (archaic)">
<!ENTITY archit "architecture term">
<!ENTITY astron "astronomy, etc. term">
<!ENTITY baseb "baseball term">
<!ENTITY biol "biology term">
<!ENTITY bot "botany term">
<!ENTITY bus "business term">
<!ENTITY econ "economics term">
<!ENTITY engr "engineering term">
<!ENTITY finc "finance term">
<!ENTITY geol "geology, etc. term">
<!ENTITY law "law, etc. term">
<!ENTITY mahj "mahjong term">
<!ENTITY med "medicine, etc. term">
<!ENTITY music "music term">
<!ENTITY Shinto "Shinto term">
<!ENTITY shogi "shogi term">
<!ENTITY sports "sports term">
<!ENTITY sumo "sumo term">
<!ENTITY zool "zoology term">
<!ENTITY joc "jocular, humorous term">
<!ENTITY anat "anatomical term">
<gloss xml:lang="eng">repetition mark in katakana</gloss>
<gloss xml:lang="eng">voiced repetition mark in katakana</gloss>
The error line corresponds to the <pos>&n;</pos>
at the end so it's like he is not able to map the referenced entity (<!ENTITY n "noun (common) (futsuumeishi)">
You need to tell XmlSerializer
(or rather, the underlying XmlReader
) that it is safe to expand XML entity references by setting XmlReaderSettings.DtdProcessing = DtdProcessing.Parse
like so:
static entry[] DeserializeEntries(string filePath)
var settings = new XmlReaderSettings
// Allow processing of DTD
DtdProcessing = DtdProcessing.Parse,
// On older versions of .Net instead set
//ProhibitDtd = false,
// But for security, prevent DOS attacks by limiting the total number of characters that can be expanded to something sane.
MaxCharactersFromEntities = (long)1e7,
// And for security, disable resolution of entities from external documents.
XmlResolver = null,
using (var reader = new StreamReader(filePath, Encoding.UTF8))
using (var xmlReader = XmlReader.Create(reader, settings))
var serializer = new XmlSerializer(typeof(entry[]), new XmlRootAttribute("JMdict"));
return (entry[])serializer.Deserialize(xmlReader);
It's possible for an untrusted XML to perform a Denial-of-Service attack by forcing an out-of-memory exception via cleverly crafted DTD entities and entity references, as shown here. Setting XmlReaderSettings.MaxCharactersFromEntities
to something reasonable should mitigate that.
This specific value was taken from the reference source for XElement.Load()
. Modify to fit your needs.
Similarly setting XmlReaderSettings.XmlResolver = null
prevents an untrusted XML file from generating unexpected requests to external resources.
If you are going to construct an XmlSerializer
using an XmlRootAttribute
override, you must statically cache the serializer to avoid a severe memory leak, as explained here.
Working .Net fiddle.