pdfalto icon indicating copy to clipboard operation
pdfalto copied to clipboard

Reading order: another issue (this is more problematic)

Open Aazhar opened this issue 5 years ago • 1 comments

Image Pasted at 2019-3-26 11-34

And the text appears to be maintained until approximately 35 40 years -of age, followed by modest decreases until 50 years of age,, you can see the 'hypen' out of place...

this happens with pdf2xml too btw

here the output from pdfalto

                <TextLine WIDTH="502.269" HEIGHT="8.208" ID="p1_t62" HPOS="51" VPOS="638.632">
                    <String ID="p1_w620" CONTENT="Endurance" HPOS="51" VPOS="638.632" WIDTH="38.556" HEIGHT="8.208"
                            STYLEREFS="font10"/>
                    <SP WIDTH="2.484" VPOS="638.632" HPOS="89.556"/>
                    <String ID="p1_w621" CONTENT="and" HPOS="92.04" VPOS="638.632" WIDTH="13.014" HEIGHT="8.208"
                            STYLEREFS="font10"/>
                    <SP WIDTH="2.484" VPOS="638.632" HPOS="105.054"/>
                    <String ID="p1_w622" CONTENT="ultra-endurance" HPOS="107.538" VPOS="638.632" WIDTH="56.601"
                            HEIGHT="8.208" STYLEREFS="font10"/>
                    <SP WIDTH="2.484" VPOS="638.632" HPOS="164.139"/>
                    <String ID="p1_w623" CONTENT="performance," HPOS="166.623" VPOS="638.632" WIDTH="47.826"
                            HEIGHT="8.208" STYLEREFS="font10"/>
                    <SP WIDTH="2.484" VPOS="638.632" HPOS="214.449"/>
                    <String ID="p1_w624" CONTENT="in" HPOS="216.933" VPOS="638.632" WIDTH="7.011" HEIGHT="8.208"
                            STYLEREFS="font10"/>
                    <SP WIDTH="2.484" VPOS="638.632" HPOS="223.944"/>
                    <String ID="p1_w625" CONTENT="terms" HPOS="226.428" VPOS="638.632" WIDTH="20.034" HEIGHT="8.208"
                            STYLEREFS="font10"/>
                    <SP WIDTH="2.484" VPOS="638.632" HPOS="246.462"/>
                    <String ID="p1_w626" CONTENT="of" HPOS="248.946" VPOS="638.632" WIDTH="7.506" HEIGHT="8.208"
                            STYLEREFS="font10"/>
                    <SP WIDTH="2.484" VPOS="638.632" HPOS="256.452"/>
                    <String ID="p1_w627" CONTENT="the" HPOS="258.936" VPOS="638.632" WIDTH="11.016" HEIGHT="8.208"
                            STYLEREFS="font10"/>
                    <SP WIDTH="2.484" VPOS="638.632" HPOS="269.952"/>
                    <String ID="p1_w628" CONTENT="overall" HPOS="272.436" VPOS="638.632" WIDTH="25.047"
                            HEIGHT="8.208" STYLEREFS="font10"/>
                    <SP WIDTH="2.484" VPOS="638.632" HPOS="297.483"/>
                    <String ID="p1_w629" CONTENT="time" HPOS="299.967" VPOS="638.632" WIDTH="16.029" HEIGHT="8.208"
                            STYLEREFS="font10"/>
                    <SP WIDTH="2.484" VPOS="638.632" HPOS="315.996"/>
                    <String ID="p1_w630" CONTENT="taken," HPOS="318.48" VPOS="638.632" WIDTH="21.789" HEIGHT="8.208"
                            STYLEREFS="font10"/>
                    <SP WIDTH="2.484" VPOS="638.632" HPOS="340.269"/>
                    <String ID="p1_w631" CONTENT="appears" HPOS="342.753" VPOS="638.632" WIDTH="27.54"
                            HEIGHT="8.208" STYLEREFS="font10"/>
                    <SP WIDTH="2.484" VPOS="638.632" HPOS="370.293"/>
                    <String ID="p1_w632" CONTENT="to" HPOS="372.777" VPOS="638.632" WIDTH="7.011" HEIGHT="8.208"
                            STYLEREFS="font10"/>
                    <SP WIDTH="2.484" VPOS="638.632" HPOS="379.788"/>
                    <String ID="p1_w633" CONTENT="be" HPOS="382.272" VPOS="638.632" WIDTH="8.505" HEIGHT="8.208"
                            STYLEREFS="font10"/>
                    <SP WIDTH="2.484" VPOS="638.632" HPOS="390.777"/>
                    <String ID="p1_w634" CONTENT="maintained" HPOS="393.261" VPOS="638.632" WIDTH="40.077"
                            HEIGHT="8.208" STYLEREFS="font10"/>
                    <SP WIDTH="2.484" VPOS="638.632" HPOS="433.338"/>
                    <String ID="p1_w635" CONTENT="until" HPOS="435.822" VPOS="638.632" WIDTH="16.542" HEIGHT="8.208"
                            STYLEREFS="font10"/>
                    <SP WIDTH="2.484" VPOS="638.632" HPOS="452.364"/>
                    <String ID="p1_w636" CONTENT="approximately" HPOS="454.848" VPOS="638.632" WIDTH="52.101"
                            HEIGHT="8.208" STYLEREFS="font10"/>
                    <SP WIDTH="2.484" VPOS="638.632" HPOS="506.949"/>
                    <String ID="p1_w637" CONTENT="35" HPOS="509.433" VPOS="638.632" WIDTH="9.009" HEIGHT="8.208"
                            STYLEREFS="font10"/>
                    <SP WIDTH="4.308" VPOS="638.632" HPOS="518.442"/>
                    <String ID="p1_w638" CONTENT="40" HPOS="522.75" VPOS="638.632" WIDTH="9.009" HEIGHT="8.208"
                            STYLEREFS="font10"/>
                    <SP WIDTH="2.484" VPOS="638.632" HPOS="531.759"/>
                    <String ID="p1_w639" CONTENT="years" HPOS="534.243" VPOS="638.632" WIDTH="19.026" HEIGHT="8.208"
                            STYLEREFS="font10"/>
                </TextLine>
            </TextBlock>
            <TextBlock ID="p1_b49" HPOS="518.7" VPOS="637" HEIGHT="9.75579" WIDTH="3.995">
                <TextLine WIDTH="3.995" HEIGHT="9.75579" ID="p1_t63" HPOS="518.7" VPOS="637">
                    <String ID="p1_w640" CONTENT="–" HPOS="518.7" VPOS="637" WIDTH="3.995" HEIGHT="9.75579"
                            STYLEREFS="font5"/>
                </TextLine>
            </TextBlock>

Aazhar avatar Mar 26 '19 09:03 Aazhar

same as #50 , this is another particular case where a small portion of text is out of the reading order..

Aazhar avatar Mar 26 '19 16:03 Aazhar