I need control over the printing text positions. I need to print each TJ/Tj wise. I have TJ or Tj operator's cosstring object. How can I get the each character x, y positions in a PDF.
Content stream looks like this
C position is (72, 633.8289)
h position is (88.7903125, 633.8289)
a position is (101.7059375, 633.8289) ....
How can we get these positions using PDFBOX classes. I tried with some
writeString(String, List<TextPosition>) or processTextPosition(TextPosition)
I am able to get text lines but not positions. Please help me any example code is there to get each TJ operator's each character positions?
we need to override all positon related classes. Thanks @Tilman Hausherr and @mkl. Please correct my answer if required. thanks once again.
import java.awt.geom.GeneralPath;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import org.apache.pdfbox.contentstream.PDContentStream;
import org.apache.pdfbox.contentstream.PDFStreamEngine;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.contentstream.operator.OperatorProcessor;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDFontFactory;
import org.apache.pdfbox.pdmodel.graphics.blend.BlendMode;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDTransparencyGroup;
import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState;
import org.apache.pdfbox.pdmodel.graphics.state.PDTextState;
import org.apache.pdfbox.util.Matrix;
import org.apache.pdfbox.util.Vector;
public class PDStreamengine extends PDFStreamEngine {
public static Map<String, OperatorProcessor> operators = new HashMap<String, OperatorProcessor>(80);
private Matrix textMatrix;
private Matrix textLineMatrix;
private Stack<PDGraphicsState> graphicsStack = new Stack<PDGraphicsState>();
private PDResources resources;
private PDPage currentPage;
private Matrix initialMatrix;
public static ArrayList<ArrayList<Double>> chars;
public static ArrayList<Matrix> charmatrixs ;
public static ArrayList<String> tjchars;
@Override
public void processPage(PDPage page) throws IOException
{
initPage(page);
if (page.hasContents())
{
processStream(page);
}
}
private void initPage(PDPage page)
{
if (page == null)
{
throw new IllegalArgumentException("Page cannot be null");
}
currentPage = page;
graphicsStack.clear();
graphicsStack.push(new PDGraphicsState(page.getCropBox()));
textMatrix = null;
textLineMatrix = null;
resources = null;
initialMatrix = page.getMatrix();
}
public void processStream(PDContentStream contentStream) throws IOException
{
PDResources parent = pushResources(contentStream);
Stack<PDGraphicsState> savedStack = saveGraphicsStacks();
Matrix parentMatrix = initialMatrix;
// transform the CTM using the stream's matrix
getGraphicsState().getCurrentTransformationMatrix().concatenate(contentStream.getMatrix());
// the stream's initial matrix includes the parent CTM, e.g. this allows a scaled form
initialMatrix = getGraphicsState().getCurrentTransformationMatrix().clone();
// clip to bounding box
PDRectangle bbox = contentStream.getBBox();
clipToRect(bbox);
processStreamOperators(contentStream);
initialMatrix = parentMatrix;
restoreGraphicsStacks(savedStack);
popResources(parent);
}
private PDResources pushResources(PDContentStream contentStream)
{
// resource lookup: first look for stream resources, then fallback to the current page
PDResources parentResources = resources;
PDResources streamResources = contentStream.getResources();
if (streamResources != null)
{
resources = streamResources;
}
else if (resources != null)
{
// inherit directly from parent stream, this is not in the PDF spec, but the file from
// PDFBOX-1359 does this and works in Acrobat
}
else
{
resources = currentPage.getResources();
}
// resources are required in PDF
if (resources == null)
{
resources = new PDResources();
}
return parentResources;
}
private void clipToRect(PDRectangle rectangle)
{
if (rectangle != null)
{
GeneralPath clip = rectangle.transform(getGraphicsState().getCurrentTransformationMatrix());
getGraphicsState().intersectClippingPath(clip);
}
}
private void processStreamOperators(PDContentStream contentStream) throws IOException
{
List<COSBase> arguments = new ArrayList<COSBase>();
PDFStreamParser parser = new PDFStreamParser(contentStream);
new ProcessClasses();
Object token = parser.parseNextToken();
while (token != null)
{
if (token instanceof COSObject)
{
arguments.add(((COSObject) token).getObject());
}
else if (token instanceof Operator)
{
processOperator((Operator) token, arguments);
arguments = new ArrayList<COSBase>();
}
else
{
arguments.add((COSBase) token);
}
token = parser.parseNextToken();
}
}
private void popResources(PDResources parentResources)
{
resources = parentResources;
}
protected void processOperator(Operator operator, List<COSBase> operands) throws IOException
{
String name = operator.getName();
OperatorProcessor processor = operators.get(name);
if (processor != null)
{
processor.setContext(this);
try
{
System.out.println(operator);
System.out.println(operands);
processor.process(operator, operands);
}
catch (IOException e)
{
operatorException(operator, operands, e);
}
}
else
{
unsupportedOperator(operator, operands);
}
}
protected final Stack<PDGraphicsState> saveGraphicsStacks()
{
Stack<PDGraphicsState> savedStack = graphicsStack;
graphicsStack = new Stack<PDGraphicsState>();
graphicsStack.add(savedStack.peek().clone());
return savedStack;
}
@Override
public PDGraphicsState getGraphicsState()
{
return graphicsStack.peek();
}
public void addOperators(OperatorProcessor op)
{
op.setContext(this);
operators.put(op.getName(), op);
}
protected final void restoreGraphicsStacks(Stack<PDGraphicsState> snapshot)
{
graphicsStack = snapshot;
}
/**
* @return Returns the size of the graphicsStack.
*/
public int getGraphicsStackSize()
{
return graphicsStack.size();
}
/**
* @return Returns the textLineMatrix.
*/
public Matrix getTextLineMatrix()
{
return textLineMatrix;
}
/**
* @param value The textLineMatrix to set.
*/
public void setTextLineMatrix(Matrix value)
{
textLineMatrix = value;
}
/**
* @return Returns the textMatrix.
*/
public Matrix getTextMatrix()
{
return textMatrix;
}
/**
* @param value The textMatrix to set.
*/
public void setTextMatrix(Matrix value)
{
textMatrix = value;
}
public PDResources getResources()
{
return resources;
}
/**
* Pushes the current graphics state to the stack.
*/
public void saveGraphicsState()
{
graphicsStack.push(graphicsStack.peek().clone());
}
/**
* Pops the current graphics state from the stack.
*/
public void restoreGraphicsState()
{
graphicsStack.pop();
}
protected void applyTextAdjustment(float tx, float ty) throws IOException
{
// update the text matrix
textMatrix.concatenate(Matrix.getTranslateInstance(tx, ty));
}
public void showForm(PDFormXObject form) throws IOException
{
if (currentPage == null)
{
throw new IllegalStateException("No current page, call " +
"#processChildStream(PDContentStream, PDPage) instead");
}
if (form.getCOSObject().getLength() > 0)
{
processStream(form);
}
}
/**
* Called when a string of text is to be shown.
*
* @param string the encoded text
* @throws IOException if there was an error showing the text
*/
public void showTextString(byte[] string) throws IOException
{
showText(string);
}
@Override
public void showTransparencyGroup(PDTransparencyGroup form) throws IOException
{
processTransparencyGroup(form);
}
@Override
protected void processTransparencyGroup(PDTransparencyGroup group) throws IOException
{
if (currentPage == null)
{
throw new IllegalStateException("No current page, call " +
"#processChildStream(PDContentStream, PDPage) instead");
}
PDResources parent = pushResources(group);
Stack<PDGraphicsState> savedStack = saveGraphicsStacks();
Matrix parentMatrix = initialMatrix;
// the stream's initial matrix includes the parent CTM, e.g. this allows a scaled form
initialMatrix = getGraphicsState().getCurrentTransformationMatrix().clone();
// transform the CTM using the stream's matrix
getGraphicsState().getCurrentTransformationMatrix().concatenate(group.getMatrix());
// Before execution of the transparency group XObject’s content stream,
// the current blend mode in the graphics state shall be initialized to Normal,
// the current stroking and nonstroking alpha constants to 1.0, and the current soft mask to None.
getGraphicsState().setBlendMode(BlendMode.NORMAL);
getGraphicsState().setAlphaConstant(1);
getGraphicsState().setNonStrokeAlphaConstant(1);
getGraphicsState().setSoftMask(null);
// clip to bounding box
clipToRect(group.getBBox());
processStreamOperators(group);
initialMatrix = parentMatrix;
restoreGraphicsStack(savedStack);
popResources(parent);
}
@Override
public void showTextStrings(COSArray array) throws IOException{
PDTextState textState = getGraphicsState().getTextState();
float fontSize = textState.getFontSize();
float horizontalScaling = textState.getHorizontalScaling() / 100f;
PDFont font = textState.getFont();
chars = new ArrayList<ArrayList<Double>>();
charmatrixs = new ArrayList<Matrix>();
tjchars = new ArrayList<String>();
boolean isVertical = false;
if (font != null)
{
isVertical = font.isVertical();
}
for (COSBase obj : array)
{
if (obj instanceof COSNumber)
{
float tj = ((COSNumber)obj).floatValue();
// calculate the combined displacements
float tx, ty;
if (isVertical)
{
tx = 0;
ty = -tj / 1000 * fontSize;
}
else
{
tx = -tj / 1000 * fontSize * horizontalScaling;
ty = 0;
}
applyTextAdjustment(tx, ty);
}
else if(obj instanceof COSString)
{
byte[] string = ((COSString)obj).getBytes();
showText(string);
}
else
{
throw new IOException("Unknown type in array for TJ operation:" + obj);
}
}
if(!chars.isEmpty() && !charmatrixs.isEmpty()) {
Horizontalparsing.poscharobj.put(Horizontalparsing.tj_ycount, chars);
Horizontalparsing.txtposmatrix.put(Horizontalparsing.tj_ycount, charmatrixs);
Horizontalparsing.wordobj.put(Horizontalparsing.tj_ycount, tjchars);
Horizontalparsing.tj_ycount +=1;
}
}
@Override
protected void showText(byte[] string) throws IOException
{
PDGraphicsState state = getGraphicsState();
PDTextState textState = state.getTextState();
// get the current font
PDFont font = textState.getFont();
if (font == null)
{
// LOG.warn("No current font, will use default");
font = PDFontFactory.createDefaultFont();
}
float fontSize = textState.getFontSize();
float horizontalScaling = textState.getHorizontalScaling() / 100f;
float charSpacing = textState.getCharacterSpacing();
// put the text state parameters into matrix form
Matrix parameters = new Matrix(
fontSize * horizontalScaling, 0, // 0
0, fontSize, // 0
0, textState.getRise()); // 1
// read the stream until it is empty
InputStream in = new ByteArrayInputStream(string);
while (in.available() > 0)
{
// decode a character
int before = in.available();
int code = font.readCode(in);
int codeLength = before - in.available();
String unicode = font.toUnicode(code);
//To record char positions
ArrayList<Double> pstnchar = new ArrayList<Double>();
// Word spacing shall be applied to every occurrence of the single-byte character code
// 32 in a string when using a simple font or a composite font that defines code 32 as
// a single-byte code.
float wordSpacing = 0;
if (codeLength == 1 && code == 32)
{
wordSpacing += textState.getWordSpacing();
}
// text rendering matrix (text space -> device space)
Matrix ctm = state.getCurrentTransformationMatrix();
Matrix textRenderingMatrix = parameters.multiply(textMatrix).multiply(ctm);
// get glyph's position vector if this is vertical text
// changes to vertical text should be tested with PDFBOX-2294 and PDFBOX-1422
if (font.isVertical())
{
// position vector, in text space
Vector v = font.getPositionVector(code);
// apply the position vector to the horizontal origin to get the vertical origin
textRenderingMatrix.translate(v);
}
// get glyph's horizontal and vertical displacements, in text space
Vector w = font.getDisplacement(code);
// process the decoded glyph
saveGraphicsState();
Matrix textMatrixOld = textMatrix;
Matrix textLineMatrixOld = textLineMatrix;
showGlyph(textRenderingMatrix, font, code, unicode, w);
textMatrix = textMatrixOld;
textLineMatrix = textLineMatrixOld;
pstnchar.add((double) textMatrix.getValue(2, 0));
pstnchar.add((double) textMatrix.getValue(2, 1));
charmatrixs.add(textRenderingMatrix);
restoreGraphicsState();
// calculate the combined displacements
float tx, ty;
if (font.isVertical())
{
tx = 0;
ty = w.getY() * fontSize + charSpacing + wordSpacing;
}
else
{
tx = (w.getX() * fontSize + charSpacing + wordSpacing) * horizontalScaling;
ty = 0;
}
// update the text matrix
textMatrix.concatenate(Matrix.getTranslateInstance(tx, ty));
pstnchar.add((double) textMatrix.getValue(2, 0));
pstnchar.add((double) textMatrix.getValue(2, 1));
tjchars.add(unicode);
chars.add(pstnchar);
}
}
}