I am working with response output from Zillow's API service in R; it appears to be in an XML format and I would like to obtain the text from a certain element in the response. However, when I attempted to parse the XML file using xmlTreeParse, an error occurs, stating "XML content does not seem to be XML". How can I obtain the text of a certain element in the API response?
I am a bit of a novice with R, so any method or advice regarding how to convert this response or obtain an element's text would be awesome. I think the problem lies in the format of the API response.
library(ZillowR)
library(XML)
library(RCurl)
## one must have a "zws_id", Zillow's API key ##
reply <- GetDeepSearchResults(address = '33 Pratt Rd', citystatezip = 'Scituate, MA',
rentzestimate = FALSE, zws_id = "XXXXXX",
url = "http://www.zillow.com/webservice/GetDeepSearchResults.htm")
reply
doc = xmlTreeParse(reply, asText = TRUE, useInternal = TRUE)
xmlValue(doc[["//amount"]])
"reply" output
$request
$request$address
[1] "33 Pratt Rd"
$request$citystatezip
[1] "Scituate, MA"
$message
$message$text
[1] "Request successfully processed"
$message$code
[1] "0"
$response
<response>
<results>
<result>
<zpid>57223487</zpid>
<links>
<homedetails>http://www.zillow.com/homedetails/33-Pratt-Rd-Scituate-MA-02066/57223487_zpid/</homedetails>
<graphsanddata>http://www.zillow.com/homedetails/33-Pratt-Rd-Scituate-MA-02066/57223487_zpid/#charts-and-data</graphsanddata>
<mapthishome>http://www.zillow.com/homes/57223487_zpid/</mapthishome>
<comparables>http://www.zillow.com/homes/comps/57223487_zpid/</comparables>
</links>
<address>
<street>33 Pratt Rd</street>
<zipcode>02066</zipcode>
<city>Scituate</city>
<state>MA</state>
<latitude>42.211625</latitude>
<longitude>-70.775096</longitude>
</address>
<FIPScounty>25023</FIPScounty>
<useCode>SingleFamily</useCode>
<taxAssessmentYear>2019</taxAssessmentYear>
<taxAssessment>441300.0</taxAssessment>
<yearBuilt>1972</yearBuilt>
<lotSizeSqFt>21400</lotSizeSqFt>
<finishedSqFt>1363</finishedSqFt>
<bathrooms>1.0</bathrooms>
<bedrooms>3</bedrooms>
<totalRooms>6</totalRooms>
<zestimate>
<amount currency="USD">472759</amount>
<last-updated>06/16/2019</last-updated>
Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
Consider 'structure(list(), *)' instead. <oneWeekChange deprecated="true"/>
<valueChange duration="30" currency="USD">2131</valueChange>
<valuationRange>
<low currency="USD">449121</low>
<high currency="USD">501125</high>
</valuationRange>
<percentile>0</percentile>
</zestimate>
<localRealEstate>
<region name="Scituate" id="397319" type="city">
<zindexValue>551,000</zindexValue>
<links>
<overview>http://www.zillow.com/local-info/MA-Scituate/r_397319/</overview>
<forSaleByOwner>http://www.zillow.com/scituate-ma/fsbo/</forSaleByOwner>
<forSale>http://www.zillow.com/scituate-ma/</forSale>
</links>
</region>
</localRealEstate>
</result>
</results>
</response>
It is a bit extensive, but this is a text representation of the call/response output...
dput(reply)
list(request = list(address = "33 Pratt Rd", citystatezip = "Scituate, MA"),
message = list(text = "Request successfully processed", code = "0"),
response = structure(list(name = "response", attributes = NULL,
children = list(results = structure(list(name = "results",
attributes = NULL, children = list(result = structure(list(
name = "result", attributes = NULL, children = list(
zpid = structure(list(name = "zpid", attributes = NULL,
children = list(text = structure(list(name = "text",
attributes = NULL, children = NULL, namespace = NULL,
namespaceDefinitions = NULL, value = "57223487"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), links = structure(list(name = "links",
attributes = NULL, children = list(homedetails = structure(list(
name = "homedetails", attributes = NULL,
children = list(text = structure(list(name = "text",
attributes = NULL, children = NULL, namespace = NULL,
namespaceDefinitions = NULL, value = "http://www.zillow.com/homedetails/33-Pratt-Rd-Scituate-MA-02066/57223487_zpid/"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), graphsanddata = structure(list(name = "graphsanddata",
attributes = NULL, children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "http://www.zillow.com/homedetails/33-Pratt-Rd-Scituate-MA-02066/57223487_zpid/#charts-and-data"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), mapthishome = structure(list(name = "mapthishome",
attributes = NULL, children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "http://www.zillow.com/homes/57223487_zpid/"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), comparables = structure(list(name = "comparables",
attributes = NULL, children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "http://www.zillow.com/homes/comps/57223487_zpid/"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), address = structure(list(name = "address",
attributes = NULL, children = list(street = structure(list(
name = "street", attributes = NULL, children = list(
text = structure(list(name = "text",
attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "33 Pratt Rd"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), zipcode = structure(list(name = "zipcode",
attributes = NULL, children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "02066"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), city = structure(list(name = "city",
attributes = NULL, children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "Scituate"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), state = structure(list(name = "state",
attributes = NULL, children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "MA"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), latitude = structure(list(name = "latitude",
attributes = NULL, children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "42.211625"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), longitude = structure(list(name = "longitude",
attributes = NULL, children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "-70.775096"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), FIPScounty = structure(list(name = "FIPScounty",
attributes = NULL, children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "25023"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), useCode = structure(list(name = "useCode",
attributes = NULL, children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "SingleFamily"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), taxAssessmentYear = structure(list(name = "taxAssessmentYear",
attributes = NULL, children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "2019"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), taxAssessment = structure(list(name = "taxAssessment",
attributes = NULL, children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "441300.0"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), yearBuilt = structure(list(name = "yearBuilt",
attributes = NULL, children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "1972"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), lotSizeSqFt = structure(list(name = "lotSizeSqFt",
attributes = NULL, children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "21400"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), finishedSqFt = structure(list(name = "finishedSqFt",
attributes = NULL, children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "1363"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), bathrooms = structure(list(name = "bathrooms",
attributes = NULL, children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "1.0"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), bedrooms = structure(list(name = "bedrooms",
attributes = NULL, children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "3"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), totalRooms = structure(list(name = "totalRooms",
attributes = NULL, children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "6"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), zestimate = structure(list(name = "zestimate",
attributes = NULL, children = list(amount = structure(list(
name = "amount", attributes = c(currency = "USD"),
children = list(text = structure(list(name = "text",
attributes = NULL, children = NULL, namespace = NULL,
namespaceDefinitions = NULL, value = "472404"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), `last-updated` = structure(list(name = "last-updated",
attributes = NULL, children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "06/17/2019"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), oneWeekChange = structure(list(name = "oneWeekChange",
attributes = c(deprecated = "true"), children = NULL,
namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), valueChange = structure(list(name = "valueChange",
attributes = c(duration = "30", currency = "USD"
), children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "1494"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), valuationRange = structure(list(name = "valuationRange",
attributes = NULL, children = list(low = structure(list(
name = "low", attributes = c(currency = "USD"),
children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "448784"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode",
"oldClass")), high = structure(list(name = "high",
attributes = c(currency = "USD"), children = list(
text = structure(list(name = "text",
attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "500748"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), percentile = structure(list(name = "percentile",
attributes = NULL, children = list(text = structure(list(
name = "text", attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "0"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
)), localRealEstate = structure(list(name = "localRealEstate",
attributes = NULL, children = list(region = structure(list(
name = "region", attributes = c(name = "Scituate",
id = "397319", type = "city"), children = list(
zindexValue = structure(list(name = "zindexValue",
attributes = NULL, children = list(
text = structure(list(name = "text",
attributes = NULL, children = NULL,
namespace = NULL, namespaceDefinitions = NULL,
value = "551,000"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL,
namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode",
"oldClass")), links = structure(list(
name = "links", attributes = NULL,
children = list(overview = structure(list(
name = "overview", attributes = NULL,
children = list(text = structure(list(
name = "text", attributes = NULL,
children = NULL, namespace = NULL,
namespaceDefinitions = NULL, value = "http://www.zillow.com/local-info/MA-Scituate/r_397319/"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL,
namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode",
"oldClass")), forSaleByOwner = structure(list(
name = "forSaleByOwner", attributes = NULL,
children = list(text = structure(list(
name = "text", attributes = NULL,
children = NULL, namespace = NULL,
namespaceDefinitions = NULL, value = "http://www.zillow.com/scituate-ma/fsbo/"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL,
namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode",
"oldClass")), forSale = structure(list(
name = "forSale", attributes = NULL,
children = list(text = structure(list(
name = "text", attributes = NULL,
children = NULL, namespace = NULL,
namespaceDefinitions = NULL, value = "http://www.zillow.com/scituate-ma/"), class = c("XMLTextNode",
"XMLNode", "RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL,
namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode",
"oldClass"))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"
))), namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass"))),
namespace = NULL, namespaceDefinitions = NULL), class = c("XMLNode",
"RXMLAbstractNode", "XMLAbstractNode", "oldClass")))
I would expect the xmlValue function to output the "zestimate" (Zillow's estimated value for any given house) dollar value displayed as text for the XML element "amount" in the API response.
Thanks. Sorry for any unclarity or incorrect terminology!
I am starting from the response
which is my_xml. Essentially similar steps should get you to the amount
even if you start from your xml.
library(tidyverse)
library(XML)
df_xml = xmlTreeParse(my_xml)
df_root = xmlRoot(df_xml)
getNodeSet(df_root, "//amount") %>% unlist
With the new dput data, the following works.
df = dput(result) # essentially your df=result
df1 = df$response # there are three items in the list and response holds the info
getNodeSet(df1, "//amount") %>% unlist # this gets you a dataframe