DataCleaner icon indicating copy to clipboard operation
DataCleaner copied to clipboard

NPE: write HDFS file

Open ClaudiaPHI opened this issue 8 years ago • 0 comments

Step to reproduce:

  1. Open the sample job below
  2. Open "Create CSV" component
  3. Go to the path of the file ("hdfs:///datacleaner/results/test/DKCustomers.csv")
  4. Remove the 3 slashes from the path.

Result:

image

Expected result: No NPE.

Job:

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<job xmlns="http://eobjects.org/analyzerbeans/job/1.0">
    <job-metadata>
        <job-description>Created with DataCleaner Enterprise edition 5.0</job-description>
        <author>claudiap</author>
        <updated-date>2016-04-12+02:00</updated-date>
        <metadata-properties>
            <property name="CoordinatesX.datastores.customers.csv">40</property>
            <property name="CoordinatesY.datastores.customers.csv">40</property>
        </metadata-properties>
    </job-metadata>
    <source>
        <data-context ref="Customers"/>
        <columns>
            <column id="col_id" path="id" type="STRING"/>
            <column id="col_given_name" path="given_name" type="STRING"/>
            <column id="col_family_name" path="family_name" type="STRING"/>
            <column id="col_company" path="company" type="STRING"/>
            <column id="col_address_line" path="address_line" type="STRING"/>
            <column id="col_post_code" path="post_code" type="STRING"/>
            <column id="col_city" path="city" type="STRING"/>
            <column id="col_country" path="country" type="STRING"/>
            <column id="col_email" path="email" type="STRING"/>
            <column id="col_birthdate" path="birthdate" type="STRING"/>
            <column id="col_gender" path="gender" type="STRING"/>
            <column id="col_job_title" path="job_title" type="STRING"/>
            <column id="col_income_amount" path="income_amount" type="STRING"/>
            <column id="col_income_currency" path="income_currency" type="STRING"/>
        </columns>
    </source>
    <transformation>
        <filter>
            <descriptor ref="Equals"/>
            <metadata-properties>
                <property name="CoordinatesY">300</property>
                <property name="CoordinatesX">121</property>
            </metadata-properties>
            <properties>
                <property name="Compare values" value="[DK]"/>
            </properties>
            <input ref="col_country" name="Input column"/>
            <outcome id="outcome_0" category="EQUALS"/>
        </filter>
    </transformation>
    <analysis>
        <analyzer requires="outcome_0">
            <descriptor ref="Create CSV file"/>
            <metadata-properties>
                <property name="CoordinatesX">419</property>
                <property name="CoordinatesY">249</property>
            </metadata-properties>
            <properties>
                <property name="File" value="s3://hdfs:///datacleaner/results/test/DKCustomers.csv"/>
                <property name="Separator char" value="&amp;#44;"/>
                <property name="Quote char" value="&amp;quot;"/>
                <property name="Escape char" value="\"/>
                <property name="Include header" value="true"/>
                <property name="Encoding" value="UTF-8"/>
                <property name="Fields" value="[id,given_name,family_name,company,address_line,post_code,city,country,email,birthdate,gender,job_title,income_amount,income_currency]"/>
                <property name="Overwrite file if exists" value="true"/>
            </properties>
            <input ref="col_id" name="Columns"/>
            <input ref="col_given_name" name="Columns"/>
            <input ref="col_family_name" name="Columns"/>
            <input ref="col_company" name="Columns"/>
            <input ref="col_address_line" name="Columns"/>
            <input ref="col_post_code" name="Columns"/>
            <input ref="col_city" name="Columns"/>
            <input ref="col_country" name="Columns"/>
            <input ref="col_email" name="Columns"/>
            <input ref="col_birthdate" name="Columns"/>
            <input ref="col_gender" name="Columns"/>
            <input ref="col_job_title" name="Columns"/>
            <input ref="col_income_amount" name="Columns"/>
            <input ref="col_income_currency" name="Columns"/>
        </analyzer>
    </analysis>
</job>

ClaudiaPHI avatar Apr 12 '16 08:04 ClaudiaPHI