I want to write a program that makes a binary search in a file.
I have a TreeSet ADT that has strings int and I want to search in the file for each string this way: First I want to read the middle page of the file and search it. If the string is found, then the position of the string is returned; if not, then I want to read the page from the left or right half of the file depending on the alphabetical order of the strings.
My code for the class of the binary search is:
public class PageBinarySearch {
final int NOT_FOUND = -1;
final int BOUND_LIMIT = -1;
final int EVRTHG_OK = 0;
final int ON = 1;
final int OFF = 0;
private DataInputStream inFile;
private String[] readBuffer; //This buffer is used to read a specified page from
//the disk.
private String[] auxBuffer; //Auxiliary buffer is used for searching in the la-
//st page. This buffer has less than PAGE_SIZE ele-
//ments.
private int lastPage, leftElemNum, lastPageNo;
private int diskAccessMeter; //This variable is used to count disk accesses.
private int firstNum;
/********************************************************************************/
//Constructor of the class
public PageBinarySearch(String filename, String key, int PageSize, int numPage, int numOfWords) throws IOException{
System.out.println();
System.out.println("Binary Search on disk pages");
System.out.println("*************************************************");
System.out.println();
//Initializing the elements of the class.
readBuffer = new String[PageSize];
lastPage = numPage*PageSize;
leftElemNum = numOfWords-(lastPage);
auxBuffer = new String[leftElemNum];
lastPageNo = numPage;
diskAccessMeter = 0;
this.setFirstNum(filename);
basicPageBinarySearchMethod(filename, 0, lastPageNo, key,PageSize,numPage,numOfWords);
System.out.println("Disk accesses:"+this.getDiskAccessMeter());
}
/********************************************************************************/
//Recursive binary search on disk pages.
public void basicPageBinarySearchMethod(String filename, int start, int end,
String key, int PageSize, int numPage, int numOfWords) throws IOException{
inFile = new DataInputStream(new FileInputStream(filename));
int bound = boundHandler(start, key);
if (bound != EVRTHG_OK){return;}
int midPage = start + (end-start)/2;
int midPageIndex = ((midPage) * (PageSize));
int midPageEnd = midPageIndex + PageSize;
//System.out.println("----------------------------------------");
System.out.println("Page:"+(midPage+1));
System.out.println(" Index:"+midPageIndex+" End:"+midPageEnd);
//Accessing midPage's index.
accessPageIndex(midPageIndex - 1);
fillBasicBuffer(PageSize);
System.out.println();
if (key.compareTo(readBuffer[0])<0){
//Case that the key is in the left part.
end = midPage - 1;
inFile.close(); //We close the stream because in the next recursion
//we want to access new midPage.
basicPageBinarySearchMethod(filename, start, end, key, PageSize, numPage, numOfWords);
}
else if (key.compareTo(readBuffer[255])>0){
//Case that the key is in the left part.
start = midPage+1;
inFile.close();
basicPageBinarySearchMethod(filename, start, end, key, PageSize, numPage, numOfWords);
}
else{
//Case that:
//a) key is bigger than the integer, which is in the midPage-
//Index position of the file, and
//b) key is less than the integer, which is in the midPageEnd
//position of the file.
LookingOnPage(midPage, key);
}
}
/********************************************************************************/
public int boundHandler(int start, String key) throws IOException{
if (start == this.getLastPageNo()){
//In this case the program has to start searching in the last page,
//which has less than PAGE_SIZE elements. So we call the alternative
//function "LookingOnLastPage".
System.out.println("Start == End");
accessPageIndex(this.getLastPage());
LookingOnLastPage(key);
return BOUND_LIMIT;
}
//if (key < this.getFirstNum()){
//System.out.println("Key does not exist.");
//return BOUND_LIMIT;
//}
return EVRTHG_OK;
}
/********************************************************************************/
//This function is running a binary search in the specified disk page, which
//is saved in the readBuffer.
public void LookingOnPage(int pageNo, String key) throws IOException{
int i, result;
System.out.println("Looking for key:"+key+" on page:"+(pageNo+1));
result = myBinarySearch(key, readBuffer);
if (result != -1){
System.out.println("Key found on page:"+(pageNo+1));
inFile.close();
return;
}
else{
System.out.println("Key is not found");
inFile.close();
return;
}
}
/********************************************************************************/
//This function is running a binary search in the last disk page, which
//is saved in the auxBuffer.
public void LookingOnLastPage(String key) throws IOException{
int i, result;
this.setDiskAccessMeter(this.getDiskAccessMeter()+1);
System.out.println("Looking for key:"+key+" on last page:"
+(this.getLastPageNo()+1));
for (i=0; i<this.getLeftElemNum(); i++){
auxBuffer[i] = inFile.readUTF();
}
result = myBinarySearch(key, auxBuffer);
if (result != -1){
System.out.println("Key found on last page");
inFile.close();
return;
}
else{
System.out.println("Key is not found");
inFile.close();
return;
}
}
/********************************************************************************/
public void accessPageIndex(int intNum) throws IOException
{
//This function is skipping intNum integers in the file, to access page's
//index.
int i;
System.out.println(" Accessing page's index...");
inFile.skipBytes(intNum*4);
//inFile.readInt();
}
/********************************************************************************/
public void fillBasicBuffer(int PageSize) throws IOException{
//Loading readBuffer.
int i;
this.setDiskAccessMeter(this.getDiskAccessMeter()+1);
for (i=0; i<PageSize; i++){
readBuffer[i] = inFile.readUTF();
}
}
/********************************************************************************/
//This function implements binary search on a given buffer.
public int myBinarySearch(String key, String[] auxBuffer) {
int lo = 0;
int hi = auxBuffer.length - 1;
while (lo <= hi) {
// Key is in a[lo..hi] or not present.
int mid = lo + (hi - lo) / 2;
if (key.compareTo(auxBuffer[mid])<0) hi = mid - 1;
else if (key.compareTo(auxBuffer[mid])>0) lo = mid + 1;
else return mid;
}
return NOT_FOUND;
}
/********************************************************************************/
public int getLastPage() {
return lastPage;
}
public void setLastPage(int lastPage) {
this.lastPage = lastPage;
}
public int getLeftElemNum() {
return leftElemNum;
}
public void setLeftElemNum(int leftElemNum) {
this.leftElemNum = leftElemNum;
}
public int getLastPageNo() {
return lastPageNo;
}
public void setLastPageNo(int lastPageNo) {
this.lastPageNo = lastPageNo;
}
public int getDiskAccessMeter() {
return diskAccessMeter;
}
public void setDiskAccessMeter(int diskAccessMeter) {
this.diskAccessMeter = diskAccessMeter;
}
public int getFirstNum() {
return firstNum;
}
public void setFirstNum(String filename) throws IOException {
inFile = new DataInputStream(new FileInputStream(filename));
this.firstNum = inFile.readInt();
inFile.close();
}
}
and my main is :
public class MyMain {
private static final int DataPageSize = 128; // Default Data Page size
public static void main(String[] args) throws IOException {
TreeSet<DictPage> listOfWords = new TreeSet<DictPage>(new MyDictPageComp());
LinkedList<Page> Eurethrio = new LinkedList<Page>();
File file = new File("C:\\Kennedy.txt");
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
//This will reference one line at a time...
String line = null;
int line_count=0; //Metavliti gia na metrame grammes ..
int byte_count; //Metavliti gia na metrame bytes...
int total_byte_count=0; //Metavliti gia synoliko arithmo bytes ...
int fromIndex;
int middleP;
int kat = 0;
while( (line = br.readLine())!= null ){
line_count++;
fromIndex=0;
String [] tokens = line.split(",\\s+|\\s*\\\"\\s*|\\s+|\\.\\s*|\\s*\\:\\s*");
String line_rest=line;
for (int i=1; i <= tokens.length; i++) {
byte_count = line_rest.indexOf(tokens[i-1]);
fromIndex = fromIndex + byte_count + 1 + tokens[i-1].length();
if (fromIndex < line.length())
line_rest = line.substring(fromIndex);
listOfWords.add(new DictPage(tokens[i-1],kat));
kat++;
Eurethrio.add(new Page("Kennedy",fromIndex));
}
total_byte_count += fromIndex;
Eurethrio.add(new Page("Kennedy", total_byte_count));
}
//for(DictPage p : listOfWords){
//System.out.println(p.getWord() + " " + p.getPage());
//}
//for (int i = 0;i<Eurethrio.size();i++){
//System.out.println(""+Eurethrio.get(i).getFile()+" "+Eurethrio.get(i).getBytes());
//}
ByteArrayOutputStream bos = new ByteArrayOutputStream(128) ;
DataOutputStream out = new DataOutputStream(bos);
String s ;
int integ;
byte[] buf ;
int nPag=1; //Aritmos selidwn
int numOfWords=0;
for (DictPage p : listOfWords){
s = p.getWord();
integ = p.getPage();
numOfWords++;
byte dst[] = new byte[20];
byte[] src = s.getBytes(); //metatrepei se bytes to string
System.arraycopy(src, 0, dst, 1, src.length); //to antigrafei ston buffer dst
out.write(dst, 0, 20); // to grafei sto arxeio
out.writeInt(integ); // grafei ton akeraio sto file
out.close();
buf = bos.toByteArray(); // Creates a newly allocated byte array.
System.out.println("\nbuf size: " + buf.length + " bytes");
//dhmiourgia selidas (h opoia einai enas byte array)
if(buf.length> nPag*DataPageSize){
nPag++;
}
byte[] DataPage = new byte[nPag*DataPageSize];
System.arraycopy( buf, 0, DataPage, 0, buf.length); // antigrafw buf sth selida
System.out.println("TARRARA"+DataPage.length);
bos.close();//kleinw buf
// write to the file
RandomAccessFile MyFile = new RandomAccessFile ("newbabis", "rw");
MyFile.seek(0);
MyFile.write(DataPage);
}
System.out.println("Number of Pages :"+nPag);
if (nPag%2 == 0){
middleP = nPag/2;
}
else{
middleP = (nPag+1)/2;
}
System.out.println("Middle page is no:"+middleP);
PageBinarySearch BinarySearch;
String key;
for(DictPage p : listOfWords){
key = p.getWord();
BinarySearch = new PageBinarySearch("C:\\Kennedy.txt", key , DataPageSize, nPag, numOfWords);
}
}
}
My program is not working well, and I can't find what I am doing wrong.
use a buffered inputstream instead of your data inputstream and adapt it's usage in your source code
since the readUTF method of the data inputstream will reach end of file instantly if no data was written into a file with the data outputstream method writeUTF or similiar.
note: this will not fix your program. it will lead to a stack overflow because your recursive binary search method does'nt return
private BufferedInputStream inFile;
public void basicPageBinarySearchMethod(String filename, int start, int end,
String key, int PageSize, int numPage, int numOfWords) throws IOException{
inFile = new BufferedInputStream(new FileInputStream(filename));
...
}
public void accessPageIndex(int intNum) throws IOException
{
//This function is skipping intNum integers in the file, to access page's
//index.
int i;
System.out.println(" Accessing page's index...");
inFile.skip(intNum*4);
//inFile.readInt();
}
public void LookingOnLastPage(String key) throws IOException{
int i, result;
this.setDiskAccessMeter(this.getDiskAccessMeter()+1);
System.out.println("Looking for key:"+key+" on last page:"
+(this.getLastPageNo()+1));
for (i=0; i<this.getLeftElemNum(); i++){
auxBuffer[i] = String.valueOf((char)inFile.read());
}
...
}
public void fillBasicBuffer(int PageSize) throws IOException{
//Loading readBuffer.
int i;
this.setDiskAccessMeter(this.getDiskAccessMeter()+1);
for (i=0; i < PageSize; i++){
char c = (char)inFile.read();
readBuffer[i] = String.valueOf(c);
}
}
public void setFirstNum(String filename) throws IOException {
inFile = new BufferedInputStream(new FileInputStream(filename));
this.firstNum = inFile.read();
inFile.close();
}