在PDF c#中查找文本的高度

Question

我正在开发基于 pdf 的 files.for pdf 文件创建我正在使用 itextsharp。因为我无法找到文本的高度，我提取了 LocationTextExtractionStrategy class 来找到文本的大小，但相应的 Y 和 Y1 显示相同的结果。我附上代码供您参考。

public class LocationTextExtractionStrategyWithPosition : LocationTextExtractionStrategy
{
    private readonly List<TextChunk> locationalResult = new List<TextChunk>();

    private readonly ITextChunkLocationStrategy tclStrat;

    public LocationTextExtractionStrategyWithPosition() : this(new TextChunkLocationStrategyDefaultImp())
    {
    }

    public LocationTextExtractionStrategyWithPosition(ITextChunkLocationStrategy strat)
    {
        tclStrat = strat;
    }

    private bool StartsWithSpace(string str)
    {
        if (str.Length == 0) return false;
        return str[0] == ' ';
    }

    private bool EndsWithSpace(string str)
    {
        if (str.Length == 0) return false;
        return str[str.Length - 1] == ' ';
    }

    private List<TextChunk> filterTextChunks(List<TextChunk> textChunks, ITextChunkFilter filter)
    {
        if (filter == null)
        {
            return textChunks;
        }

        var filtered = new List<TextChunk>();

        foreach (var textChunk in textChunks)
        {
            if (filter.Accept(textChunk))
            {
                filtered.Add(textChunk);
            }
        }

        return filtered;
    }

    public override void RenderText(TextRenderInfo renderInfo)
    {
        LineSegment segment = renderInfo.GetBaseline();
        if (renderInfo.GetRise() != 0)
        { 
            Matrix riseOffsetTransform = new Matrix(0, -renderInfo.GetRise());
            segment = segment.TransformBy(riseOffsetTransform);
        }
        TextChunk tc = new TextChunk(renderInfo.GetText(), tclStrat.CreateLocation(renderInfo, segment));
        locationalResult.Add(tc);
    }

    public IList<TextLocation> GetLocations()
    {
        var filteredTextChunks = filterTextChunks(locationalResult, null);
        filteredTextChunks.Sort();

        TextChunk lastChunk = null;

        var textLocations = new List<TextLocation>();

        foreach (var chunk in filteredTextChunks)
        {
                textLocations.Add(new TextLocation
                {
                    Text = chunk.Text,
                    X = chunk.Location.StartLocation[0],
                    Y = chunk.Location.StartLocation[1],
                    X1 = chunk.Location.EndLocation[0],
                    Y1 = chunk.Location.EndLocation[1]
                });

            lastChunk = chunk;
        }

        //now find the location(s) with the given texts
        return textLocations;
    }
}

public class TextLocation
{
    public float X { get; set; }
    public float Y { get; set; }

    public string Text { get; set; }
    public float X1 { get; set; }
    public float Y1 { get; set; }
}

这里是我的截图供大家参考

Answer 1

默认情况下，chunk.Location 中使用的 ITextChunkLocation 的实现仅包含与基线上块的开始和结束相关的信息。因此，对于水平文本，开始和结束的高度相同。

如果您还想关注高度等其他细节，您应该创建另一个 ITextChunkLocation 的实现，它也存储原始文本呈现信息的那些属性。然后为您的位置 class 实施 ITextChunkLocationStrategy 并在 LocationTextExtractionStrategyWithPosition.

的构造函数中使用它

在PDF c#中查找文本的高度

Finding Height of text in PDF c#

c#

itext