Search code examples
stata

Convert strings to HRF monthly dates


I imported from Excel as variable names, and reshaped into "observations", a series of strings (5s) that represent dates.

These dates (stored in the variable daate) have the form ddnn20Y: 1108, which means 1 Jan 2008; 1912, which means 1 Sep 2012; or 11210, which means 1 Dec 2010. That is they are not Stata internal form (SIF) dates held as strings!

I have tried more things than I can remember (i.e.: formatting directly with format daate %tdddnn20YY or gen date = date(daate, "DM20Y")).

I have read the help datetime and tried to implement the HRF-to-SIF Functions (which in my case I think should be the first step) unsuccessfully.

This other question almost gets me to the solution: first by destringing daate to int, and then by gen date = mofd(daate); format date %tm; but it was yet another unsuccessful approach.

Example data:

* Example generated by -dataex-. To install: ssc install dataex
clear
input str12 isin str5 daate double price
"AEA000201011" "11008" 3.53
"AEA000201011" "11009" 2.44
"AEA000201011" "11010" 2.32
"AEA000201011" "11011" 2.86
"AEA000201011" "11012" 3.28
"AEA000201011" "11013" 4.93
"AEA000201011" "11014"  8.6
"AEA000201011" "11015" 7.75
"AEA000201011" "1108"  5.34
"AEA000201011" "1109"  1.78
"AEA000201011" "1110"  1.61
"AEA000201011" "11108" 2.63
"AEA000201011" "11109"    2
"AEA000201011" "1111"  2.14
"AEA000201011" "11110" 2.43
"AEA000201011" "11111" 2.94
"AEA000201011" "11112" 3.29
"AEA000201011" "11113"    5
"AEA000201011" "11114" 7.97
"AEA000201011" "11115" 7.09
"AEA000201011" "1112"  2.77
"AEA000201011" "1113"  3.01
"AEA000201011" "1114"   6.5
"AEA000201011" "1115"  7.03
"AEA000201011" "11208" 2.55
"AEA000201011" "11209" 1.82
"AEA000201011" "11210"  2.3
"AEA000201011" "11211" 2.85
"AEA000201011" "11212" 2.98
"AEA000201011" "11213"  5.4
"AEA000201011" "11214" 6.66
"AEA000201011" "11215" 6.25
"AEA000201011" "1208"  5.58
"AEA000201011" "1209"  1.48
"AEA000201011" "1210"  1.59
"AEA000201011" "1211"  2.15
"AEA000201011" "1212"  2.91
"AEA000201011" "1213"   3.6
"AEA000201011" "1214"  6.79
"AEA000201011" "1215"  7.18
"AEA000201011" "1308"  5.37
"AEA000201011" "1309"  1.74
"AEA000201011" "1310"  1.66
"AEA000201011" "1311"  2.21
"AEA000201011" "1312"  3.23
"AEA000201011" "1313"  4.01
"AEA000201011" "1314"  7.15
"AEA000201011" "1315"   7.6
"AEA000201011" "1408"  5.41
"AEA000201011" "1409"  1.64
"AEA000201011" "1410"  2.09
"AEA000201011" "1411"  2.52
"AEA000201011" "1412"  3.23
"AEA000201011" "1413"  4.13
"AEA000201011" "1414"  7.36
"AEA000201011" "1415"  6.66
"AEA000201011" "1508"  5.82
"AEA000201011" "1509"  1.56
"AEA000201011" "1510"  1.89
"AEA000201011" "1511"  2.87
"AEA000201011" "1512"  3.31
"AEA000201011" "1513"  4.66
"AEA000201011" "1514"   7.8
"AEA000201011" "1515"  7.36
"AEA000201011" "1608"  5.25
"AEA000201011" "1609"  1.78
"AEA000201011" "1610"  1.63
"AEA000201011" "1611"  2.92
"AEA000201011" "1612"  2.91
"AEA000201011" "1613"  4.95
"AEA000201011" "1614"  8.65
"AEA000201011" "1615"  7.57
"AEA000201011" "1708"   5.2
"AEA000201011" "1709"  1.59
"AEA000201011" "1710"  1.57
"AEA000201011" "1711"  3.13
"AEA000201011" "1712"  3.36
"AEA000201011" "1713"     5
"AEA000201011" "1714"   7.6
"AEA000201011" "1715"  7.55
"AEA000201011" "1808"  5.04
"AEA000201011" "1809"  1.99
"AEA000201011" "1810"  1.65
"AEA000201011" "1811"  3.11
"AEA000201011" "1812"  3.37
"AEA000201011" "1813"  5.33
"AEA000201011" "1814"   8.7
"AEA000201011" "1815"  8.12
"AEA000201011" "1908"   4.1
"AEA000201011" "1909"  1.98
"AEA000201011" "1910"  1.68
"AEA000201011" "1911"  2.98
"AEA000201011" "1912"   3.3
"AEA000201011" "1913"  5.01
"AEA000201011" "1914"  8.11
"AEA000201011" "1915"  7.76
"AEA000701010" "11008" 2.14
"AEA000701010" "11009"  .78
"AEA000701010" "11010"  .67
"AEA000701010" "11011"  .58
end

Solution

  • This solution relies on tidying up the original first to get closer to what Stata prefers, so allowing careful checks. We don't need all your data example (but thanks). As @Pearly Spencer points out, these are really monthly dates presented as daily dates.

    * Example generated by -dataex-. To install: ssc install dataex
    clear
    input str5 daate
    "11008"
    "11009"
    "11010"
    "11011"
    "11012"
    "11013"
    "11014"
    "11015"
    "1108" 
    "1109" 
    "1110" 
    "11108"
    "11109"
    "1111" 
    "11110"
    "11111"
    "11112"
    "11113"
    "11114"
    "11115"
    "1112" 
    "1113" 
    "1114" 
    "1115" 
    "11208"
    "11209"
    "11210"
    "11211"
    "11212"
    "11213"
    "11214"
    "11215"
    "1208" 
    "1209" 
    "1210" 
    "1211" 
    "1212" 
    "1213" 
    "1214" 
    "1215" 
    "1308" 
    "1309" 
    "1310" 
    "1311" 
    "1312" 
    "1313" 
    "1314" 
    "1315" 
    "1408" 
    "1409" 
    "1410" 
    "1411" 
    "1412" 
    "1413" 
    "1414" 
    "1415" 
    "1508" 
    "1509" 
    "1510" 
    "1511" 
    "1512" 
    "1513" 
    "1514" 
    "1515" 
    "1608" 
    "1609" 
    "1610" 
    "1611" 
    "1612" 
    "1613" 
    "1614" 
    "1615" 
    "1708" 
    "1709" 
    "1710" 
    "1711" 
    "1712" 
    "1713" 
    "1714" 
    "1715" 
    "1808" 
    "1809" 
    "1810" 
    "1811" 
    "1812" 
    "1813" 
    "1814" 
    "1815" 
    "1908" 
    "1909" 
    "1910" 
    "1911" 
    "1912" 
    "1913" 
    "1914" 
    "1915" 
    end
    
    gen better = substr(daate, 1, 1) + ///
    " " + substr(daate, 2, length(daate) - 3) + ///
    " 20" + substr(daate, -2, 2) 
    
    gen mdate = mofd(daily(better, "DMY")) 
    
    sort mdate 
    
    format mdate %tm 
    
    gen year = year(daily(better, "DMY")) 
    
    list if year <= 2009 , sepby(year) 
    
         +------------------------------------+
         | daate      better     mdate   year |
         |------------------------------------|
      1. |  1108    1 1 2008    2008m1   2008 |
      2. |  1208    1 2 2008    2008m2   2008 |
      3. |  1308    1 3 2008    2008m3   2008 |
      4. |  1408    1 4 2008    2008m4   2008 |
      5. |  1508    1 5 2008    2008m5   2008 |
      6. |  1608    1 6 2008    2008m6   2008 |
      7. |  1708    1 7 2008    2008m7   2008 |
      8. |  1808    1 8 2008    2008m8   2008 |
      9. |  1908    1 9 2008    2008m9   2008 |
     10. | 11008   1 10 2008   2008m10   2008 |
     11. | 11108   1 11 2008   2008m11   2008 |
     12. | 11208   1 12 2008   2008m12   2008 |
         |------------------------------------|
     13. |  1109    1 1 2009    2009m1   2009 |
     14. |  1209    1 2 2009    2009m2   2009 |
     15. |  1309    1 3 2009    2009m3   2009 |
     16. |  1409    1 4 2009    2009m4   2009 |
     17. |  1509    1 5 2009    2009m5   2009 |
     18. |  1609    1 6 2009    2009m6   2009 |
     19. |  1709    1 7 2009    2009m7   2009 |
     20. |  1809    1 8 2009    2009m8   2009 |
     21. |  1909    1 9 2009    2009m9   2009 |
     22. | 11009   1 10 2009   2009m10   2009 |
     23. | 11109   1 11 2009   2009m11   2009 |
     24. | 11209   1 12 2009   2009m12   2009 |
         +------------------------------------+