I am trying to combine multiple files in Fortran. The files all have one column in common ("einst")
The main file has over 100.000 lines but the other files contain extra info about some of the IDs in the main file.
I want to merge the smaller files into the large file and keep all the IDs from the large file that did not get extra information from the smaller files.
I previously asked a similar question... Can I join two files by a matching column in fortran? ..... And had great success but with that code BUT, IDs that are NOT in BOTH data files will not appear in the new merged data file.
Here is my unsuccessful attempt:
program sameining2
implicit none
integer,parameter :: k15 = selected_int_kind(15)
integer, parameter :: noeinst=25 !LARGE FILE
integer, parameter :: nosaed_k=3 !SMALL FILE
integer, parameter :: nosaed_1=3 !SMALL FILE
integer, parameter :: nosaed_2=3 !SMALL FILE
integer, parameter :: nosaed_3=3 !SMALL FILE
!COLUMNS IN LARGE FILE
integer(kind=k15), dimension(1:noeinst) :: einstg
integer, dimension(1:noeinst) :: bu, faeding, forgun
!COLUMNS IN SMALL FILE, firstlastkvigur
integer(kind=k15), dimension(1:nosaed_k) :: einst_k
integer, dimension(1:nosaed_k) :: frjot_k, fyrstasaed_k, &
fjoldisaed_k, sidastasaed_k
!COLUMNS IN SMALL FILE, firstlastmjalt1
integer(kind=k15), dimension(1:nosaed_1) :: einst_1
integer, dimension(1:nosaed_1) :: frjot_1, fyrstasaed_1, &
fjoldisaed_1, sidastasaed_1
!COLUMNS IN SMALL FILE, firstlastmjalt2
integer(kind=k15), dimension(1:nosaed_2) :: einst_2
integer, dimension(1:nosaed_2) :: frjot_2, fyrstasaed_2, &
fjoldisaed_2, sidastasaed_2
!COLUMNS IN SMALL FILE, firstlastmjalt3
integer(kind=k15), dimension(1:nosaed_3) :: einst_3
integer, dimension(1:nosaed_3) :: frjot_3, fyrstasaed_3, &
fjoldisaed_3, sidastasaed_3
integer :: i, j, k, m, n
!------------------------------------
!READING THE LARGE FILE
open (unit=11, file="gripalistis.txt", status="old")
do k = 1, noeinst
read (11,1011) einstg(k), bu(k), faeding(k), forgun(k)
1011 format (i15,1x,i7,1x,i8,1x,i8)
enddo
close (11, status="keep")
!READING THE SMALL FILE
open (unit=20000, file="firstlastkvigur.txt", status="old")
do i = 1, nosaed_k
read (20000,1017) einst_k(i), frjot_k(i), fyrstasaed_k(i), sidastasaed_k(i), &
fjoldisaed_k(i)
enddo
!READING THE SMALL FILE
open (unit=30000, file="firstlastmjalt1.txt", status="old")
do j = 1, nosaed_1
read (30000,1017) einst_1(j), frjot_1(j), fyrstasaed_1(j), sidastasaed_1(j), &
fjoldisaed_1(j)
enddo
!READING THE SMALL FILE
open (unit=40000, file="firstlastmjalt2.txt", status="old")
do m = 1, nosaed_2
read (40000,1017) einst_2(m), frjot_2(m), fyrstasaed_2(m), sidastasaed_2(m), &
fjoldisaed_2(m)
enddo
!READING THE SMALL FILE
open (unit=50000, file="firstlastmjalt3.txt", status="old")
do n = 1, nosaed_3
read (50000,1017) einst_3(n), frjot_3(n), fyrstasaed_3(n), sidastasaed_3(n), &
fjoldisaed_3(n)
enddo
close (20000, status="keep")
close (30000, status="keep")
close (40000, status="keep")
close (50000, status="keep")
!------------------------------------
open (unit=80, file=pth//"alldata.txt", status="new")
!This program does not work and it only starts with the first two files...
! HERE I AM TRYING TO TELL THE PROGRAM TO WRITE ALL INFORMATION NEW AND OLD IF
!THE IDs match, AND ELSE IT SHOULD JUST PRINT THE INFO FROM THE LARGE FILE
!------------------------------------
do i = no, nosaed_k
do k = 1, noeinst
if (einst_kf(i) == einstg(k) ) then
write (80,1018) einstg(k), bu(k), faeding(k), forgun(k)
frjot_k(i), fyrstasaed_k(i), sidastasaed_k(i), fjoldisaed_k(i)
else
write (80,1018) einstg(k), bu(k), faeding(k), forgun(k)
endif
1018 format (i15,1x,i7,1x,i8,1x,i8,1x &
i3,1x,i8,1x,i8,1x,i2,1x &
i8,1x,i8,1x,i2 )
enddo
enddo
close (80, status="keep")
1017 format (i15,1x,i3,1x,i8,1x,i8,1x,i2)
endprogram sameining2
But what happens is that the do loop compares the first ID in the first file to all the ID in the second file and just writes it over and over again and I get a very large wrong file.
I am not skilled enough to write do loops that compare the IDs from both files, writes information from both files if there is a match but if there is not a match in the small file it only writes information that was already in the large file.
My main goal is actually to try to combine 4 smaller files into the large data file and end up with the same amount of IDs as the original large file but all the extra information in the small files has merged to matching IDs. But the code above only attempts to merge two files. Edit: I have added types for all the 5 files and below there are previews of the files. I have not edited the do loop I tried to make. My final goal is a file with this information:
einstg(k), bu(k), faeding(k), forgun(k),
frjot_k(i), fyrstasaed_k(i), sidastasaed_k(i), fjoldisaed_k(i),
frjot_1(j), fyrstasaed_1(j), sidastasaed_1(j), fjoldisaed_1(j),
frjot_2(m), fyrstasaed_2(m), sidastasaed_2(m), fjoldisaed_2(m),
frjot_3(n), fyrstasaed_3(n), sidastasaed_3(n), fjoldisaed_3(n)
Help with my problem would be very much appreciated!
Edit: Preview of the files:
gripalisti.txt
200716619210513 1661921 20070309 20101012
200716619210514 1661921 20070317 20120919
200716619210515 1661921 20070425 20110208
200716619210521 1661921 20070730 20121211
200716619210522 1661921 20070812 20131125
200716619210525 1661921 20071114 20141121
200716619210526 1661921 20071123 20140205
200716619210530 1661921 20071223 20101129
200716619510154 1661951 20070201 20131115
200716619510156 1661951 20070203 20090709
200716619510157 1661951 20070203 20110715
200716619510158 1661951 20070215 20100611
200716619510159 1661951 20070322 20181116
200716619510160 1661951 20070525 20150807
200716619510161 1661951 20070526 20090417
200816385110263 1638511 20080514 20170629
200816385110267 1638511 20080806 20170127
200816385110268 1638511 20080827 20170725
200816385110276 1638511 20081112 20121120
200816386010531 1638881 20080207 20130314
200816386010532 1637551 20080213 20131107
200816386010533 1638601 20080213 20120224
200816386010543 1638601 20080305 20100901
200816386010544 1638601 20080306 20101015
200816386010546 1638921 20080311 20160113
firstlastkvigur.txt
200716619210513 32 20090309 20091218 6
200816386010531 60 20091013 20091013 1
200816386010546 89 20091215 20100113 3
firstlastmjalt1.txt
200716619210521 32 20111218 20111218 1
200716619210525 162 20101207 20101207 1
200816386010533 60 20100928 20101019 2
firstlastmjalt2.txt
200716619510154 10 20130612 20130724 5
200716619510159 10 20120612 20120715 1
200816386010533 10 20110612 20150722 1
firstlastmjalt3.txt
200716619210513 34 20111218 20111218 7
200716619210526 34 20091215 20100113 2
200716619510158 54 20100928 20101019 2
I think the code you want is this:
module m
implicit none
integer, parameter :: k15 = selected_int_kind(15)
type :: EinstData
integer(k15) :: einst
integer :: bu
integer :: faeding
integer :: forgun
end type
type :: SaedData
integer(k15) :: einst
integer :: frjot
integer :: fyrstasaed
integer :: sidastasaed
integer :: fjoldisaed
end type
contains
end module
program p
use m
implicit none
character(14), parameter :: einst_filename = 'gripalisti.txt'
integer, parameter :: einst_size = 25
character(19), parameter :: saed_filenames(3) = [ 'firstlastkvigur.txt', &
& 'firstlastmjalt1.txt', &
& 'firstlastmjalt2.txt' ]
integer, parameter :: saed_sizes(3) = [3, 3, 3]
type(EinstData) :: einst(1:einst_size)
type(SaedData) :: saed
type(SaedData) :: einst_saed(1:einst_size)
logical :: einst_has_saed(1:einst_size)
character(23) :: einst_format = '(i15,1x,i7,1x,i8,1x,i8)'
character(25) :: saed_format = '(1x,i3,1x,i8,1x,i8,1x,i2)'
integer :: i,j,k
! Read the main file.
open(unit=11, file=einst_filename, status='old')
do i=1,einst_size
read(11,*) einst(i)%einst, &
& einst(i)%bu, &
& einst(i)%faeding, &
& einst(i)%forgun
enddo
close(11)
! Read each small file in turn.
einst_has_saed = .false.
do i=1,3
open(unit=11, file=saed_filenames(i), status='old')
do j=1,saed_sizes(j)
read(11,*) saed%einst, &
& saed%frjot, &
& saed%fyrstasaed, &
& saed%sidastasaed, &
& saed%fjoldisaed
! Find the matching line from the main file.
! If found, set einst_saed(k) and set einst_has_saed(k) to true.
do k=1,einst_size
if (einst(k)%einst==saed%einst) then
einst_saed(k) = saed
einst_has_saed(k) = .true.
endif
enddo
enddo
close(11)
enddo
! Write the output file.
open(unit=11, file='alldata.txt', status='new')
do i=1,einst_size
write(11, einst_format, advance='no') einst(i)%einst, &
& einst(i)%bu, &
& einst(i)%faeding, &
& einst(i)%forgun
if (einst_has_saed(i)) then
write(11, saed_format, advance='no') einst_saed(i)%frjot, &
& einst_saed(i)%fyrstasaed, &
& einst_saed(i)%sidastasaed, &
& einst_saed(i)%fjoldisaed
endif
write(11, *)
enddo
close(11)
end program
which produces alldata.txt:
200716619210513 1661921 20070309 20101012
200716619210514 1661921 20070317 20120919
200716619210515 1661921 20070425 20110208
200716619210521 1661921 20070730 20121211 32 20111218 20111218 1
200716619210522 1661921 20070812 20131125
200716619210525 1661921 20071114 20141121 162 20101207 20101207 1
200716619210526 1661921 20071123 20140205
200716619210530 1661921 20071223 20101129
200716619510154 1661951 20070201 20131115
200716619510156 1661951 20070203 20090709
200716619510157 1661951 20070203 20110715
200716619510158 1661951 20070215 20100611
200716619510159 1661951 20070322 20181116
200716619510160 1661951 20070525 20150807
200716619510161 1661951 20070526 20090417
200816385110263 1638511 20080514 20170629
200816385110267 1638511 20080806 20170127
200816385110268 1638511 20080827 20170725
200816385110276 1638511 20081112 20121120
200816386010531 1638881 20080207 20130314
200816386010532 1637551 20080213 20131107
200816386010533 1638601 20080213 20120224 60 20100928 20101019 2
200816386010543 1638601 20080305 20100901
200816386010544 1638601 20080306 20101015
200816386010546 1638921 20080311 20160113