在PDF c#中查找文本的高度
Finding Height of text in PDF c#
我正在开发基于 pdf 的 files.for pdf 文件创建我正在使用 itextsharp。因为我无法找到文本的高度,我提取了 LocationTextExtractionStrategy class 来找到文本的大小,但相应的 Y 和 Y1 显示相同的结果。我附上代码供您参考。
public class LocationTextExtractionStrategyWithPosition : LocationTextExtractionStrategy
{
private readonly List<TextChunk> locationalResult = new List<TextChunk>();
private readonly ITextChunkLocationStrategy tclStrat;
public LocationTextExtractionStrategyWithPosition() : this(new TextChunkLocationStrategyDefaultImp())
{
}
public LocationTextExtractionStrategyWithPosition(ITextChunkLocationStrategy strat)
{
tclStrat = strat;
}
private bool StartsWithSpace(string str)
{
if (str.Length == 0) return false;
return str[0] == ' ';
}
private bool EndsWithSpace(string str)
{
if (str.Length == 0) return false;
return str[str.Length - 1] == ' ';
}
private List<TextChunk> filterTextChunks(List<TextChunk> textChunks, ITextChunkFilter filter)
{
if (filter == null)
{
return textChunks;
}
var filtered = new List<TextChunk>();
foreach (var textChunk in textChunks)
{
if (filter.Accept(textChunk))
{
filtered.Add(textChunk);
}
}
return filtered;
}
public override void RenderText(TextRenderInfo renderInfo)
{
LineSegment segment = renderInfo.GetBaseline();
if (renderInfo.GetRise() != 0)
{
Matrix riseOffsetTransform = new Matrix(0, -renderInfo.GetRise());
segment = segment.TransformBy(riseOffsetTransform);
}
TextChunk tc = new TextChunk(renderInfo.GetText(), tclStrat.CreateLocation(renderInfo, segment));
locationalResult.Add(tc);
}
public IList<TextLocation> GetLocations()
{
var filteredTextChunks = filterTextChunks(locationalResult, null);
filteredTextChunks.Sort();
TextChunk lastChunk = null;
var textLocations = new List<TextLocation>();
foreach (var chunk in filteredTextChunks)
{
textLocations.Add(new TextLocation
{
Text = chunk.Text,
X = chunk.Location.StartLocation[0],
Y = chunk.Location.StartLocation[1],
X1 = chunk.Location.EndLocation[0],
Y1 = chunk.Location.EndLocation[1]
});
lastChunk = chunk;
}
//now find the location(s) with the given texts
return textLocations;
}
}
public class TextLocation
{
public float X { get; set; }
public float Y { get; set; }
public string Text { get; set; }
public float X1 { get; set; }
public float Y1 { get; set; }
}
这里是我的截图供大家参考
默认情况下,chunk.Location
中使用的 ITextChunkLocation
的实现仅包含与基线上块的开始和结束相关的信息。因此,对于水平文本,开始和结束的高度相同。
如果您还想关注高度等其他细节,您应该创建另一个 ITextChunkLocation
的实现,它也存储原始文本呈现信息的那些属性。然后为您的位置 class 实施 ITextChunkLocationStrategy
并在 LocationTextExtractionStrategyWithPosition
.
的构造函数中使用它
我正在开发基于 pdf 的 files.for pdf 文件创建我正在使用 itextsharp。因为我无法找到文本的高度,我提取了 LocationTextExtractionStrategy class 来找到文本的大小,但相应的 Y 和 Y1 显示相同的结果。我附上代码供您参考。
public class LocationTextExtractionStrategyWithPosition : LocationTextExtractionStrategy
{
private readonly List<TextChunk> locationalResult = new List<TextChunk>();
private readonly ITextChunkLocationStrategy tclStrat;
public LocationTextExtractionStrategyWithPosition() : this(new TextChunkLocationStrategyDefaultImp())
{
}
public LocationTextExtractionStrategyWithPosition(ITextChunkLocationStrategy strat)
{
tclStrat = strat;
}
private bool StartsWithSpace(string str)
{
if (str.Length == 0) return false;
return str[0] == ' ';
}
private bool EndsWithSpace(string str)
{
if (str.Length == 0) return false;
return str[str.Length - 1] == ' ';
}
private List<TextChunk> filterTextChunks(List<TextChunk> textChunks, ITextChunkFilter filter)
{
if (filter == null)
{
return textChunks;
}
var filtered = new List<TextChunk>();
foreach (var textChunk in textChunks)
{
if (filter.Accept(textChunk))
{
filtered.Add(textChunk);
}
}
return filtered;
}
public override void RenderText(TextRenderInfo renderInfo)
{
LineSegment segment = renderInfo.GetBaseline();
if (renderInfo.GetRise() != 0)
{
Matrix riseOffsetTransform = new Matrix(0, -renderInfo.GetRise());
segment = segment.TransformBy(riseOffsetTransform);
}
TextChunk tc = new TextChunk(renderInfo.GetText(), tclStrat.CreateLocation(renderInfo, segment));
locationalResult.Add(tc);
}
public IList<TextLocation> GetLocations()
{
var filteredTextChunks = filterTextChunks(locationalResult, null);
filteredTextChunks.Sort();
TextChunk lastChunk = null;
var textLocations = new List<TextLocation>();
foreach (var chunk in filteredTextChunks)
{
textLocations.Add(new TextLocation
{
Text = chunk.Text,
X = chunk.Location.StartLocation[0],
Y = chunk.Location.StartLocation[1],
X1 = chunk.Location.EndLocation[0],
Y1 = chunk.Location.EndLocation[1]
});
lastChunk = chunk;
}
//now find the location(s) with the given texts
return textLocations;
}
}
public class TextLocation
{
public float X { get; set; }
public float Y { get; set; }
public string Text { get; set; }
public float X1 { get; set; }
public float Y1 { get; set; }
}
这里是我的截图供大家参考
默认情况下,chunk.Location
中使用的 ITextChunkLocation
的实现仅包含与基线上块的开始和结束相关的信息。因此,对于水平文本,开始和结束的高度相同。
如果您还想关注高度等其他细节,您应该创建另一个 ITextChunkLocation
的实现,它也存储原始文本呈现信息的那些属性。然后为您的位置 class 实施 ITextChunkLocationStrategy
并在 LocationTextExtractionStrategyWithPosition
.