I was trying to read data from a xml file into python, My file looks like this,
1. <Array type="Matrix" nelem="2037">
2. <Matrix nrows="92" ncols="5">
3. 1.0144E+05 296.34 24.34 2.36E-02 9.18E-09
4. 1.0132E+05 296.12 34.62 2.34E-02 9.18E-09
5. 1.0103E+05 295.85 59.50 2.32E-02 9.61E-09
6. 1.0063E+05 295.51 94.08 2.30E-02 1.05E-08
7. 1.0010E+05 295.07 139.65 2.29E-02 1.24E-08
8. 9.9444E+04 294.51 196.23 2.28E-02 1.49E-08
9. 9.8676E+04 293.95 263.01 2.27E-02 1.70E-08
10. 9.7779E+04 293.36 341.50 2.23E-02 1.94E-08
11. 9.6727E+04 292.95 434.30 2.22E-02 2.27E-08
12. 9.5517E+04 292.28 542.09 2.20E-02 2.59E-08
13. 9.4150E+04 291.50 665.24 2.12E-02 2.70E-08
14. 9.2626E+04 290.83 804.34 2.08E-02 2.64E-08
15. 9.0945E+04 290.02 960.00 2.04E-02 2.52E-08
16. 8.9111E+04 289.29 1132.66 2.00E-02 2.51E-08
17. 8.7133E+04 288.48 1322.50 1.96E-02 2.61E-08
18. 8.5016E+04 287.48 1529.73 1.92E-02 2.68E-08
19. 8.2769E+04 286.42 1754.71 1.84E-02 2.77E-08
20. 8.0399E+04 285.76 1997.90 1.74E-02 2.91E-08
21. 7.7920E+04 285.16 2259.60 1.62E-02 3.14E-08
22. 7.5342E+04 284.11 2539.80 1.53E-02 3.40E-08
23. 7.2678E+04 282.85 2838.57 1.37E-02 3.46E-08
24. 6.9937E+04 281.48 3155.99 1.15E-02 3.30E-08
25. 6.7135E+04 279.79 3491.77 9.44E-03 3.06E-08
26. 6.4287E+04 277.74 3845.49 7.14E-03 3.38E-08
27. 6.1403E+04 276.48 4217.67 2.80E-03 5.91E-08
28. 5.8497E+04 274.86 4608.78 1.21E-03 6.84E-08
29. 5.5584E+04 271.76 5017.35 3.52E-03 4.09E-08
30. 5.2686E+04 268.40 5440.57 4.30E-03 2.86E-08
31. 4.9832E+04 265.81 5875.99 2.99E-03 2.45E-08
32. 4.7049E+04 263.28 6320.95 1.35E-03 1.88E-08
33. 4.4363E+04 260.75 6771.68 5.27E-04 1.42E-08
34. 4.1799E+04 257.97 7223.64 6.16E-04 1.30E-08
35. 3.9366E+04 255.02 7673.76 8.03E-04 1.43E-08
36. 3.7061E+04 251.94 8121.31 1.17E-03 1.72E-08
37. 3.4879E+04 248.88 8566.01 1.27E-03 1.74E-08
38. 3.2814E+04 245.60 9007.64 8.27E-04 1.53E-08
39. 3.0860E+04 242.06 9445.71 4.83E-04 1.20E-08
40. 2.9012E+04 238.35 9879.78 3.83E-04 9.68E-09
41. 2.7265E+04 234.55 10309.53 3.30E-04 9.10E-09
42. 2.5614E+04 230.80 10734.89 2.89E-04 8.65E-09
43. 2.4054E+04 227.19 11155.95 2.34E-04 8.05E-09
44. 2.2581E+04 223.68 11572.86 1.75E-04 7.38E-09
45. 2.1191E+04 220.25 11985.73 1.25E-04 7.66E-09
46. 1.9879E+04 217.01 12394.75 8.71E-05 7.49E-09
47. 1.8641E+04 213.99 12800.23 5.93E-05 2.50E-08
48. 1.7473E+04 211.29 13202.67 4.46E-05 5.99E-08
49. 1.6373E+04 208.76 13602.46 2.82E-05 7.64E-08
50. 1.5336E+04 206.22 13999.69 1.41E-05 9.54E-08
51. 1.4360E+04 203.68 14394.33 1.09E-05 1.05E-07
52. 1.3440E+04 201.70 14786.87 8.80E-06 1.34E-07
53. 1.2575E+04 200.15 15178.25 6.54E-06 1.46E-07
54. 1.1760E+04 198.40 15569.14 5.39E-06 1.68E-07
55. 1.0989E+04 196.78 15960.93 4.67E-06 2.74E-07
56. 1.0258E+04 196.25 16356.83 3.72E-06 5.86E-07
57. 9.5617E+03 196.05 16760.42 3.20E-06 1.02E-06
58. 8.8959E+03 194.61 17173.03 3.17E-06 1.25E-06
59. 8.2573E+03 196.84 17599.75 3.24E-06 1.78E-06
60. 7.6429E+03 197.60 18046.00 3.20E-06 2.03E-06
61. 7.0506E+03 197.83 18512.76 3.12E-06 2.02E-06
62. 6.4798E+03 198.96 19002.95 3.11E-06 1.95E-06
63. 5.9315E+03 201.72 19521.31 3.18E-06 1.77E-06
64. 5.4070E+03 205.16 20072.52 3.25E-06 1.36E-06
65. 4.9073E+03 208.03 20658.79 3.26E-06 7.92E-07
66. 4.4331E+03 210.27 21280.86 3.27E-06 3.69E-07
67. 3.9848E+03 211.16 21938.29 3.28E-06 2.51E-07
68. 3.5628E+03 211.81 22631.11 3.30E-06 3.40E-07
69. 3.1674E+03 212.75 23362.02 3.35E-06 9.72E-07
70. 2.7986E+03 214.17 24135.29 3.41E-06 2.31E-06
71. 2.4565E+03 215.78 24955.51 3.49E-06 4.13E-06
72. 2.1410E+03 217.31 25826.71 3.54E-06 5.75E-06
73. 1.8518E+03 218.25 26751.68 3.56E-06 6.85E-06
74. 1.5884E+03 220.18 27735.83 3.57E-06 7.84E-06
75. 1.3504E+03 224.28 28791.83 3.66E-06 8.91E-06
76. 1.1369E+03 228.51 29932.10 3.93E-06 9.87E-06
77. 9.4706E+02 230.12 31158.07 4.26E-06 1.04E-05
78. 7.7988E+02 231.07 32468.76 4.54E-06 1.05E-05
79. 6.3417E+02 233.23 33873.97 4.69E-06 1.03E-05
80. 5.0860E+02 237.07 35392.41 4.76E-06 9.53E-06
81. 4.0176E+02 242.64 37047.76 4.94E-06 8.10E-06
82. 3.1209E+02 250.37 38869.69 5.11E-06 6.37E-06
83. 2.3800E+02 256.95 40881.74 5.43E-06 4.94E-06
84. 1.7781E+02 261.15 43091.95 5.65E-06 3.89E-06
85. 1.2985E+02 263.11 45503.43 5.74E-06 3.16E-06
86. 9.2442E+01 266.27 48135.75 5.84E-06 2.52E-06
87. 6.3957E+01 267.56 51013.24 6.04E-06 2.09E-06
88. 4.2850E+01 264.55 54131.75 6.26E-06 1.81E-06
89. 2.7683E+01 255.02 57453.20 6.40E-06 1.45E-06
90. 1.7161E+01 241.49 60927.40 6.45E-06 1.09E-06
91. 1.0148E+01 226.03 64521.68 6.45E-06 7.12E-07
92. 5.6840E+00 210.65 68225.23 6.13E-06 4.00E-07
93. 2.9904E+00 196.66 72053.01 5.13E-06 2.16E-07
94. 1.0000E+00 183.12 78140.44 4.08E-06 1.51E-07
95. </Matrix>
96. <Matrix nrows="92" ncols="5">
97. 1.0158E+05 294.49 0.17 1.89E-02 2.14E-08
98. 1.0146E+05 294.34 10.39 1.82E-02 2.14E-08
99. 1.0117E+05 294.07 35.13 1.79E-02 3.07E-08
100. 1.0077E+05 293.72 69.49 1.78E-02 3.68E-08
101. 1.0024E+05 293.27 114.79 1.77E-02 3.97E-08
102. 9.9583E+04 292.72 171.05 1.77E-02 4.20E-08
103. 9.8814E+04 292.07 237.44 1.77E-02 4.39E-08
104. 9.7915E+04 291.30 315.44 1.76E-02 4.60E-08
105. 9.6861E+04 290.41 407.55 1.75E-02 4.80E-08
106. 9.5649E+04 289.84 514.48 1.53E-02 4.87E-08
107. 9.4279E+04 290.36 636.95 1.05E-02 4.99E-08
108. 9.2752E+04 290.23 775.71 9.77E-03 5.06E-08
109. 9.1067E+04 289.76 931.25 1.00E-02 5.24E-08
110. 8.9230E+04 289.17 1103.90 1.09E-02 5.59E-08
111. 8.7248E+04 288.44 1293.81 1.22E-02 5.74E-08
112. 8.5127E+04 287.46 1501.17 1.39E-02 5.66E-08
113. 8.2875E+04 286.01 1726.14 1.48E-02 5.49E-08
114. 8.0501E+04 284.37 1968.74 1.29E-02 5.49E-08
115. 7.8017E+04 284.30 2229.59 9.75E-03 5.71E-08
116. 7.5434E+04 283.44 2509.25 7.47E-03 6.05E-08
117. 7.2764E+04 282.38 2807.64 5.96E-03 6.27E-08
118. 7.0018E+04 281.58 3125.10 4.92E-03 6.16E-08
119. 6.7211E+04 280.25 3461.48 5.69E-03 6.17E-08
120. 6.4357E+04 277.96 3815.91 7.07E-03 6.21E-08
121. 6.1468E+04 275.34 4187.77 6.06E-03 6.18E-08
122. 5.8557E+04 273.19 4577.20 3.75E-03 6.04E-08
123. 5.5638E+04 271.04 4984.32 2.22E-03 5.91E-08
124. 5.2735E+04 268.30 5407.26 1.71E-03 5.79E-08
125. 4.9876E+04 264.80 5842.15 1.38E-03 5.71E-08
126. 4.7088E+04 261.36 6285.01 8.12E-04 5.68E-08
127. 4.4398E+04 258.32 6732.39 7.79E-04 5.66E-08
128. 4.1829E+04 255.49 7180.45 1.47E-03 5.16E-08
129. 3.9393E+04 252.93 7626.94 1.10E-03 5.16E-08
130. 3.7084E+04 250.55 8071.79 1.21E-03 5.00E-08
131. 3.4899E+04 247.67 8514.55 1.59E-03 4.51E-08
132. 3.2831E+04 244.13 8954.15 1.25E-03 4.52E-08
133. 3.0875E+04 240.59 9389.94 8.87E-04 4.68E-08
134. 2.9025E+04 237.16 9821.94 6.81E-04 4.55E-08
135. 2.7276E+04 233.85 10250.30 5.10E-04 4.51E-08
136. 2.5623E+04 230.59 10675.13 3.75E-04 4.65E-08
137. 2.4061E+04 227.27 11096.35 2.70E-04 4.51E-08
138. 2.2587E+04 223.78 11513.69 1.89E-04 4.43E-08
139. 2.1195E+04 220.31 11926.96 1.26E-04 4.71E-08
140. 1.9882E+04 216.94 12336.19 8.19E-05 4.96E-08
141. 1.8643E+04 213.32 12741.18 5.94E-05 4.80E-08
142. 1.7475E+04 209.90 13141.86 3.96E-05 5.31E-08
143. 1.6375E+04 207.85 13539.61 2.72E-05 9.51E-08
144. 1.5337E+04 206.91 13936.76 1.06E-05 1.16E-07
145. 1.4360E+04 205.45 14333.88 4.64E-06 1.15E-07
146. 1.3441E+04 203.53 14729.99 3.71E-06 1.37E-07
147. 1.2575E+04 201.98 15125.00 3.41E-06 2.02E-07
148. 1.1760E+04 199.74 15519.06 3.48E-06 2.91E-07
149. 1.0989E+04 197.36 15912.80 3.51E-06 3.46E-07
150. 1.0258E+04 196.71 16309.78 3.19E-06 4.26E-07
151. 9.5618E+03 196.94 16714.78 2.94E-06 5.47E-07
152. 8.8959E+03 196.94 17130.80 2.88E-06 7.00E-07
153. 8.2573E+03 197.86 17561.18 2.83E-06 9.02E-07
154. 7.6429E+03 199.41 18010.64 2.83E-06 1.06E-06
155. 7.0506E+03 200.54 18482.73 2.93E-06 1.21E-06
156. 6.4798E+03 201.99 18980.02 3.09E-06 1.27E-06
157. 5.9315E+03 203.57 19504.69 3.08E-06 1.15E-06
158. 5.4070E+03 205.51 20058.87 2.73E-06 8.33E-07
159. 4.9073E+03 209.41 20647.60 2.68E-06 8.14E-07
160. 4.4331E+03 212.32 21274.78 3.13E-06 1.24E-06
161. 3.9848E+03 214.68 21940.91 3.29E-06 1.64E-06
162. 3.5628E+03 215.32 22645.25 3.31E-06 1.80E-06
163. 3.1674E+03 216.56 23388.76 3.40E-06 2.31E-06
164. 2.7986E+03 217.74 24175.39 3.49E-06 3.20E-06
165. 2.4565E+03 219.17 25008.87 3.56E-06 4.36E-06
166. 2.1410E+03 220.43 25893.16 3.60E-06 5.54E-06
167. 1.8518E+03 221.35 26831.34 3.67E-06 6.81E-06
168. 1.5884E+03 223.48 27829.87 3.71E-06 8.09E-06
169. 1.3504E+03 225.96 28897.71 3.72E-06 9.12E-06
170. 1.1369E+03 228.41 30041.95 3.79E-06 9.85E-06
171. 9.4706E+02 232.50 31274.04 3.84E-06 1.04E-05
172. 7.7988E+02 237.28 32609.15 4.17E-06 1.03E-05
173. 6.3417E+02 239.76 34052.89 4.50E-06 1.00E-05
174. 5.0860E+02 242.46 35609.80 4.67E-06 9.29E-06
175. 4.0176E+02 249.00 37305.72 4.82E-06 7.84E-06
176. 3.1209E+02 253.53 39162.82 4.91E-06 6.44E-06
177. 2.3800E+02 258.29 41192.73 5.06E-06 5.13E-06
178. 1.7781E+02 263.13 43417.12 5.30E-06 4.06E-06
179. 1.2985E+02 267.62 45858.44 5.55E-06 3.30E-06
180. 9.2442E+01 268.18 48522.65 5.81E-06 2.69E-06
181. 6.3957E+01 267.50 51410.10 6.07E-06 2.21E-06
182. 4.2850E+01 262.93 54518.82 6.28E-06 1.91E-06
183. 2.7683E+01 254.42 57826.10 6.40E-06 1.53E-06
184. 1.7161E+01 240.48 61289.07 6.44E-06 1.16E-06
185. 1.0148E+01 224.94 64867.20 6.44E-06 7.50E-07
186. 5.6840E+00 210.51 68560.35 6.07E-06 4.00E-07
187. 2.9904E+00 199.10 72409.85 5.20E-06 2.04E-07
188. 1.0000E+00 186.87 78596.51 4.28E-06 1.46E-07
each set of data is preceeded with a header(
).Each set contains 92 rows and 5 columns.similarly there are 5000 profiles.I want to find the mean of first rows of all profiles(ie,3,97.,etc) all the second rows and so on till 92nd row,
I have used the following code to do so
import numpy as np
lat=range(0,9)
add=range(3,94)
priori_p=[]
priori_t=[]
priori_z=[]
priori_H2O_vmr=[]
priori_O3_vmr=[]
with open('matrix.xml', 'r')as input_file,open('priori.xml', 'w') as output_file:
for a in add:
for b in lat :
l=(94*b)+a
lines = linecache.getline('matrix.xml',l)
lines=lines.split()
priori_p.append(lines[0])
priori_t.append(lines[1])
priori_z.append(lines[2])
priori_H2O_vmr.append(lines[3])
priori_O3_vmr.append(lines[4])
print lines
mean_p=np.mean(priori_p)
mean_t=np.mean(priori_t)
mean_z=np.mean(priori_z)
mean_H2O_vmr=np.mean(priori_H2O_vmr)
mean_O3_vmr=np.mean(priori_O3_vmr)
I have tried many methods for reading file by referring to the previous questions asked here,none of them helped me,I have tried linecache,readlines etc..the code has the last method i tried before posting this question. In all the cases when I printed 'lines' output was blank Is there any problem with my logic? Please help, Given below is the link to my input file
Here is one way to parse your data. It looks for the start and end tags and uses a generator to yield
each numpy array in turn. So, to consume the data you will need to use a loop or other similar construct.
Code:
def from_my_data(file_stream):
state = None
data = []
for line in (l.strip().split() for l in file_stream):
if not line:
# skip empty lines
continue
elif state == 'Matrix':
if line[0] == '</Matrix>':
state = 'Array'
yield np.array(data)
data = []
else:
data.append(line)
elif state == 'Array':
if line[0] == '<Matrix':
state = 'Matrix'
elif line[0] == '</Array>':
state = None
elif state is None:
if line[0] == '<Array':
state = 'Array'
To Use:
The function above expects a file stream. (Note: I did not test this part.)
with open('matrix.xml', 'r') as input_file:
for array in from_my_data(input_file):
# process array
Test Data:
from io import StringIO
data_file = StringIO(u"""
<Array type="Matrix" nelem="2037">
<Matrix nrows="92" ncols="5">
1.0144E+05 296.34 24.34 2.36E-02 9.18E-09
1.0132E+05 296.12 34.62 2.34E-02 9.18E-09
5.6840E+00 210.65 68225.23 6.13E-06 4.00E-07
2.9904E+00 196.66 72053.01 5.13E-06 2.16E-07
1.0000E+00 183.12 78140.44 4.08E-06 1.51E-07
</Matrix>
<Matrix nrows="92" ncols="5">
1.0158E+05 294.49 0.17 1.89E-02 2.14E-08
1.0146E+05 294.34 10.39 1.82E-02 2.14E-08
1.0117E+05 294.07 35.13 1.79E-02 3.07E-08
1.0077E+05 293.72 69.49 1.78E-02 3.68E-08
5.5638E+04 271.04 4984.32 2.22E-03 5.91E-08
</Matrix>
<Matrix nrows="92" ncols="5">
1.0077E+05 293.72 69.49 1.78E-02 3.68E-08
6.4357E+04 277.96 3815.91 7.07E-03 6.21E-08
6.1468E+04 275.34 4187.77 6.06E-03 6.18E-08
5.8557E+04 273.19 4577.20 3.75E-03 6.04E-08
5.5638E+04 271.04 4984.32 2.22E-03 5.91E-08
</Matrix>
</Array>
""")
Test Code:
import numpy as np
for array in from_my_data(data_file):
print('---')
print(array)
Results:
[[u'1.0144E+05' u'296.34' u'24.34' u'2.36E-02' u'9.18E-09']
[u'1.0132E+05' u'296.12' u'34.62' u'2.34E-02' u'9.18E-09']
[u'5.6840E+00' u'210.65' u'68225.23' u'6.13E-06' u'4.00E-07']
[u'2.9904E+00' u'196.66' u'72053.01' u'5.13E-06' u'2.16E-07']
[u'1.0000E+00' u'183.12' u'78140.44' u'4.08E-06' u'1.51E-07']]
---
[[u'1.0158E+05' u'294.49' u'0.17' u'1.89E-02' u'2.14E-08']
[u'1.0146E+05' u'294.34' u'10.39' u'1.82E-02' u'2.14E-08']
[u'1.0117E+05' u'294.07' u'35.13' u'1.79E-02' u'3.07E-08']
[u'1.0077E+05' u'293.72' u'69.49' u'1.78E-02' u'3.68E-08']
[u'5.5638E+04' u'271.04' u'4984.32' u'2.22E-03' u'5.91E-08']]
---
[[u'1.0077E+05' u'293.72' u'69.49' u'1.78E-02' u'3.68E-08']
[u'6.4357E+04' u'277.96' u'3815.91' u'7.07E-03' u'6.21E-08']
[u'6.1468E+04' u'275.34' u'4187.77' u'6.06E-03' u'6.18E-08']
[u'5.8557E+04' u'273.19' u'4577.20' u'3.75E-03' u'6.04E-08']
[u'5.5638E+04' u'271.04' u'4984.32' u'2.22E-03' u'5.91E-08']]