I have a complicated string that includes a kilometer range and a fee for users that fall into that range. Ideally, I would like to transform the string into something that I could use to easily assign fees to users. I've taken the steps below to extract a list from the string, but the kilometer range is still not usable for classifying users:
import string
import unicodedata
import re
str = '{"row1":{"from":"0","to":"500","fee":"23100 "},"row2":{"from":"500","to":"1000","fee":"24100 "},"row3":{"from":"1000","to":"1500","fee":"25200 "},"row4":{"from":"1500","to":"2000","fee":"26200 "},"row5":{"from":"2000","to":"2500","fee":"27200 "},"row6":{"from":"2500","to":"3000","fee":"28300 "},"row7":{"from":"3000","to":"3500","fee":"29300 "},"row8":{"from":"3500","to":"4000","fee":"30400 "},"row9":{"from":"4000","to":"4500","fee":"31400 "},"row10":{"from":"4500","to":"5000","fee":"32400 "},"row11":{"from":"5000","to":"5500","fee":"33500 "},"row12":{"from":"5500","to":"6000","fee":"34600 "},"row13":{"from":"6000","to":"6500","fee":"35500 "},"row14":{"from":"6500","to":"7000","fee":"36600 "},"row15":{"from":"7000","to":"7500","fee":"37700 "},"row16":{"from":"7500","to":"8000","fee":"38600 "},"row17":{"from":"8000","to":"8500","fee":"39700 "},"row18":{"from":"8500","to":"9000","fee":"40300 "},"row19":{"from":"9000","to":"9500","fee":"41400 "},"row20":{"from":"9500","to":"10000","fee":"42700 "},"row21":{"from":"10000","to":"10500","fee":"43500 "},"row22":{"from":"10500","to":"11000","fee":"44500 "},"row23":{"from":"11000","to":"11500","fee":"45600 "},"row24":{"from":"11500","to":"12000","fee":"46600 "},"row25":{"from":"12000","to":"12500","fee":"47700 "},"row26":{"from":"12500","to":"13000","fee":"48700 "},"row27":{"from":"13000","to":"13500","fee":"49700 "},"row28":{"from":"13500","to":"14000","fee":"50800 "},"row29":{"from":"14000","to":"14500","fee":"51900 "},"row30":{"from":"14500","to":"15000","fee":"52800 "},"row31":{"from":"15000","to":"15500","fee":"52800 "},"row32":{"from":"15500","to":"16000","fee":"52800 "},"row33":{"from":"16000","to":"70000","fee":"52800 "'
str1 = unicodedata.normalize("NFKD", str)
str2 = str1.translate({ord(c): None for c in string.whitespace})
s1 = str2.replace('{', "")
s2 = s1.replace('"', ' ')
s3 = s2.split(sep='},')
for i in range(len(s3)):
ftm = re.search(r'from : (\d+) , to : (\d+)', s3[i])
fm = re.search(r'fee : (\d+)', s3[i])
if ftm and fm:
vfrom = int(ftm.group(1))
vto = int(ftm.group(2))
vfee= int(fm.group(1))
print(f"({vfrom}, {vto}), {vfee}")
I would like it to be instead transformed into something that extracts the lower and upper bounds of the range separately next to the fee, where I could later export into Excel and check which range the user falls between.
Your input data is almost valid JSON except that it's missing 2 trailing right-braces. If you fix that in code (albeit that it should be fixed at origin) then you can simply do this:
import json
_str = '{"row1":{"from":"0","to":"500","fee":"23100 "},"row2":{"from":"500","to":"1000","fee":"24100 "},"row3":{"from":"1000","to":"1500","fee":"25200 "},"row4":{"from":"1500","to":"2000","fee":"26200 "},"row5":{"from":"2000","to":"2500","fee":"27200 "},"row6":{"from":"2500","to":"3000","fee":"28300 "},"row7":{"from":"3000","to":"3500","fee":"29300 "},"row8":{"from":"3500","to":"4000","fee":"30400 "},"row9":{"from":"4000","to":"4500","fee":"31400 "},"row10":{"from":"4500","to":"5000","fee":"32400 "},"row11":{"from":"5000","to":"5500","fee":"33500 "},"row12":{"from":"5500","to":"6000","fee":"34600 "},"row13":{"from":"6000","to":"6500","fee":"35500 "},"row14":{"from":"6500","to":"7000","fee":"36600 "},"row15":{"from":"7000","to":"7500","fee":"37700 "},"row16":{"from":"7500","to":"8000","fee":"38600 "},"row17":{"from":"8000","to":"8500","fee":"39700 "},"row18":{"from":"8500","to":"9000","fee":"40300 "},"row19":{"from":"9000","to":"9500","fee":"41400 "},"row20":{"from":"9500","to":"10000","fee":"42700 "},"row21":{"from":"10000","to":"10500","fee":"43500 "},"row22":{"from":"10500","to":"11000","fee":"44500 "},"row23":{"from":"11000","to":"11500","fee":"45600 "},"row24":{"from":"11500","to":"12000","fee":"46600 "},"row25":{"from":"12000","to":"12500","fee":"47700 "},"row26":{"from":"12500","to":"13000","fee":"48700 "},"row27":{"from":"13000","to":"13500","fee":"49700 "},"row28":{"from":"13500","to":"14000","fee":"50800 "},"row29":{"from":"14000","to":"14500","fee":"51900 "},"row30":{"from":"14500","to":"15000","fee":"52800 "},"row31":{"from":"15000","to":"15500","fee":"52800 "},"row32":{"from":"15500","to":"16000","fee":"52800 "},"row33":{"from":"16000","to":"70000","fee":"52800 "'
for v in json.loads(_str + "}}").values():
row = [v[k].replace(" ", "").strip() for k in ("from", "to", "fee")]
print(", ".join(row))
Output (partial):
0, 500, 23100
500, 1000, 24100
1000, 1500, 25200
1500, 2000, 26200
2000, 2500, 27200
2500, 3000, 28300
3000, 3500, 29300
3500, 4000, 30400
4000, 4500, 31400
4500, 5000, 32400
5000, 5500, 33500
5500, 6000, 34600
6000, 6500, 35500
6500, 7000, 36600