Search code examples
androidhtmlcleaner

Issue with parsed text with HTMLCleaner - spaces at the begining of text


I'm able to get text using HTMLCleaner from website. The problem is that when I set the text to a TextView it shows the beginning of the text with a big space on it. screenshot

I have tried android:gravity but nothing happened.

Please help.

Here is my Code:

    private class SiteParser extends AsyncTask<String, Void, String> {

    protected String doInBackground(String... arg) {
        String output = null;

        try {
            HtmlHelper hh = new HtmlHelper(new URL(arg[0]));
            List<TagNode> news = hh.getnewsByClass("TextoPrint");

            for (Iterator<TagNode> iterator = newss.iterator(); iterator
                    .hasNext();) {
                TagNode divElement = (TagNode) iterator.next();
                output = divElement.getText().toString();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

        return output;
    }

    protected void onPostExecute(String output) {


        Bundle bundle=new Bundle();
        bundle.putString("body",output);
        Intent mainIntent = new Intent(act, MyView.class);
        mainIntent.putExtras(bundle);
                startActivity(mainIntent);
        act.finish();



    }
}

public class HtmlHelper {
    TagNode rootNode;

    public HtmlHelper(URL htmlPage) throws IOException, XPatherException {
        HtmlCleaner cleaner = new HtmlCleaner();
        rootNode = cleaner.clean(htmlPage);

    }

    List<TagNode> getnewsByClass(String Classname){
        List<TagNode> newsList = new ArrayList<TagNode>();


        TagNode divElements[] = rootNode.getElementsByName("div", true);
        for (int i = 0; divElements != null && i < divElements.length; i++) {
            String classType =  divElements[i].getAttributeByName("id");
            if (classType != null && classType.equals(Classname)) {
                newsList.add(divElements[i]);
            }
        }

        return newsList;
    }
}

Solution

  • Try removing any leading (and trailing) whitespace with trim():

    output = divElement.getText().toString().trim();