Skip to content

Commit f3a8954

Browse files
committed
Merge pull request #12 from civitaspo/delete_in_advance
Delete in advance
2 parents e0dfb95 + 2f70273 commit f3a8954

File tree

11 files changed

+676
-13
lines changed

11 files changed

+676
-13
lines changed

.travis.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
language: java
2+
jdk:
3+
- openjdk7
4+
- oraclejdk7
5+
- oraclejdk8
6+
script:
7+
- ./gradlew test
8+
after_success:
9+
- ./gradlew jacocoTestReport coveralls

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Hdfs file output plugin for Embulk
22

3+
[![Build Status](https://travis-ci.org/civitaspo/embulk-output-hdfs.svg)](https://travis-ci.org/civitaspo/embulk-output-hdfs)
4+
[![Coverage Status](https://coveralls.io/repos/github/civitaspo/embulk-output-hdfs/badge.svg?branch=master)](https://coveralls.io/github/civitaspo/embulk-output-hdfs?branch=master)
5+
36
A File Output Plugin for Embulk to write HDFS.
47

58
## Overview
@@ -20,6 +23,16 @@ A File Output Plugin for Embulk to write HDFS.
2023
- **overwrite** overwrite files when the same filenames already exists (boolean, default: `false`)
2124
- *caution*: even if this property is `true`, this does not mean ensuring the idempotence. if you want to ensure the idempotence, you need the procedures to remove output files after or before running.
2225
- **doas** username which access to Hdfs (string, default: executed user)
26+
- **delete_in_advance** delete files and directories having `path_prefix` in advance (enum, default: `NONE`)
27+
- `NONE`: do nothing
28+
- `FILE_ONLY`: delete files
29+
- `RECURSIVE`: delete files and directories
30+
31+
## CAUTION
32+
If you use `hadoop` user (hdfs admin user) as `doas`, and if `delete_in_advance` is `RECURSIVE`,
33+
`embulk-output-hdfs` can delete any files and directories you indicate as `path_prefix`,
34+
this means `embulk-output-hdfs` can destroy your hdfs.
35+
So, please be careful when you use `delete_in_advance` option and `doas` option ...
2336

2437
## Example
2538

build.gradle

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ plugins {
22
id "com.jfrog.bintray" version "1.1"
33
id "com.github.jruby-gradle.base" version "0.1.5"
44
id "java"
5+
id "checkstyle"
6+
id "com.github.kt3k.coveralls" version "2.4.0"
7+
id "jacoco"
58
}
69
import com.github.jrubygradle.JRubyExec
710
repositories {
@@ -18,12 +21,14 @@ sourceCompatibility = 1.7
1821
targetCompatibility = 1.7
1922

2023
dependencies {
21-
compile "org.embulk:embulk-core:0.7.0"
22-
provided "org.embulk:embulk-core:0.7.0"
24+
compile "org.embulk:embulk-core:0.8.8"
25+
provided "org.embulk:embulk-core:0.8.8"
2326
// compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION"
2427
compile 'org.apache.hadoop:hadoop-client:2.6.0'
2528
compile 'com.google.guava:guava:15.0'
2629
testCompile "junit:junit:4.+"
30+
testCompile "org.embulk:embulk-core:0.8.8:tests"
31+
testCompile "org.embulk:embulk-standards:0.8.8"
2732
}
2833

2934
task classpath(type: Copy, dependsOn: ["jar"]) {
@@ -33,6 +38,29 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
3338
}
3439
clean { delete "classpath" }
3540

41+
jacocoTestReport {
42+
reports {
43+
xml.enabled = true // coveralls plugin depends on xml format report
44+
html.enabled = true
45+
}
46+
}
47+
checkstyle {
48+
configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
49+
toolVersion = '6.14.1'
50+
}
51+
checkstyleMain {
52+
configFile = file("${project.rootDir}/config/checkstyle/default.xml")
53+
ignoreFailures = true
54+
}
55+
checkstyleTest {
56+
configFile = file("${project.rootDir}/config/checkstyle/default.xml")
57+
ignoreFailures = true
58+
}
59+
task checkstyle(type: Checkstyle) {
60+
classpath = sourceSets.main.output + sourceSets.test.output
61+
source = sourceSets.main.allJava + sourceSets.test.allJava
62+
}
63+
3664
task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
3765
jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
3866
script "${project.name}.gemspec"

config/checkstyle/checkstyle.xml

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!DOCTYPE module PUBLIC
3+
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4+
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5+
<module name="Checker">
6+
<!-- https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml -->
7+
<module name="FileTabCharacter"/>
8+
<module name="NewlineAtEndOfFile">
9+
<property name="lineSeparator" value="lf"/>
10+
</module>
11+
<module name="RegexpMultiline">
12+
<property name="format" value="\r"/>
13+
<property name="message" value="Line contains carriage return"/>
14+
</module>
15+
<module name="RegexpMultiline">
16+
<property name="format" value=" \n"/>
17+
<property name="message" value="Line has trailing whitespace"/>
18+
</module>
19+
<module name="RegexpMultiline">
20+
<property name="format" value="\{\n\n"/>
21+
<property name="message" value="Blank line after opening brace"/>
22+
</module>
23+
<module name="RegexpMultiline">
24+
<property name="format" value="\n\n\s*\}"/>
25+
<property name="message" value="Blank line before closing brace"/>
26+
</module>
27+
<module name="RegexpMultiline">
28+
<property name="format" value="\n\n\n"/>
29+
<property name="message" value="Multiple consecutive blank lines"/>
30+
</module>
31+
<module name="RegexpMultiline">
32+
<property name="format" value="\n\n\Z"/>
33+
<property name="message" value="Blank line before end of file"/>
34+
</module>
35+
<module name="RegexpMultiline">
36+
<property name="format" value="Preconditions\.checkNotNull"/>
37+
<property name="message" value="Use of checkNotNull"/>
38+
</module>
39+
40+
<module name="TreeWalker">
41+
<module name="EmptyBlock">
42+
<property name="option" value="text"/>
43+
<property name="tokens" value="
44+
LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
45+
LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
46+
</module>
47+
<module name="EmptyStatement"/>
48+
<module name="EmptyForInitializerPad"/>
49+
<module name="EmptyForIteratorPad">
50+
<property name="option" value="space"/>
51+
</module>
52+
<module name="MethodParamPad">
53+
<property name="allowLineBreaks" value="true"/>
54+
<property name="option" value="nospace"/>
55+
</module>
56+
<module name="ParenPad"/>
57+
<module name="TypecastParenPad"/>
58+
<module name="NeedBraces"/>
59+
<module name="LeftCurly">
60+
<property name="option" value="nl"/>
61+
<property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
62+
</module>
63+
<module name="LeftCurly">
64+
<property name="option" value="eol"/>
65+
<property name="tokens" value="
66+
LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
67+
LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
68+
</module>
69+
<module name="RightCurly">
70+
<property name="option" value="alone"/>
71+
</module>
72+
<module name="GenericWhitespace"/>
73+
<module name="WhitespaceAfter"/>
74+
<module name="NoWhitespaceBefore"/>
75+
76+
<module name="UpperEll"/>
77+
<module name="DefaultComesLast"/>
78+
<module name="ArrayTypeStyle"/>
79+
<module name="MultipleVariableDeclarations"/>
80+
<module name="ModifierOrder"/>
81+
<module name="OneStatementPerLine"/>
82+
<module name="StringLiteralEquality"/>
83+
<module name="MutableException"/>
84+
<module name="EqualsHashCode"/>
85+
<module name="InnerAssignment"/>
86+
<module name="InterfaceIsType"/>
87+
<module name="HideUtilityClassConstructor"/>
88+
89+
<module name="MemberName"/>
90+
<module name="LocalVariableName"/>
91+
<module name="LocalFinalVariableName"/>
92+
<module name="TypeName"/>
93+
<module name="PackageName"/>
94+
<module name="ParameterName"/>
95+
<module name="StaticVariableName"/>
96+
<module name="ClassTypeParameterName">
97+
<property name="format" value="^[A-Z][0-9]?$"/>
98+
</module>
99+
<module name="MethodTypeParameterName">
100+
<property name="format" value="^[A-Z][0-9]?$"/>
101+
</module>
102+
103+
<module name="AvoidStarImport"/>
104+
<module name="RedundantImport"/>
105+
<module name="UnusedImports"/>
106+
<module name="ImportOrder">
107+
<property name="groups" value="*,javax,java"/>
108+
<property name="separated" value="true"/>
109+
<property name="option" value="bottom"/>
110+
<property name="sortStaticImportsAlphabetically" value="true"/>
111+
</module>
112+
113+
<module name="WhitespaceAround">
114+
<property name="allowEmptyConstructors" value="true"/>
115+
<property name="allowEmptyMethods" value="true"/>
116+
<property name="ignoreEnhancedForColon" value="false"/>
117+
<property name="tokens" value="
118+
ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
119+
BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
120+
LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
121+
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
122+
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
123+
LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
124+
PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
125+
STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
126+
</module>
127+
</module>
128+
</module>

config/checkstyle/default.xml

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!DOCTYPE module PUBLIC
3+
"-//Puppy Crawl//DTD Check Configuration 1.3//EN"
4+
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
5+
<!--
6+
This is a subset of ./checkstyle.xml which allows some loose styles
7+
-->
8+
<module name="Checker">
9+
<module name="FileTabCharacter"/>
10+
<module name="NewlineAtEndOfFile">
11+
<property name="lineSeparator" value="lf"/>
12+
</module>
13+
<module name="RegexpMultiline">
14+
<property name="format" value="\r"/>
15+
<property name="message" value="Line contains carriage return"/>
16+
</module>
17+
<module name="RegexpMultiline">
18+
<property name="format" value=" \n"/>
19+
<property name="message" value="Line has trailing whitespace"/>
20+
</module>
21+
<module name="RegexpMultiline">
22+
<property name="format" value="\n\n\n"/>
23+
<property name="message" value="Multiple consecutive blank lines"/>
24+
</module>
25+
<module name="RegexpMultiline">
26+
<property name="format" value="\n\n\Z"/>
27+
<property name="message" value="Blank line before end of file"/>
28+
</module>
29+
30+
<module name="TreeWalker">
31+
<module name="EmptyBlock">
32+
<property name="option" value="text"/>
33+
<property name="tokens" value="
34+
LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF,
35+
LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, INSTANCE_INIT, STATIC_INIT"/>
36+
</module>
37+
<module name="EmptyStatement"/>
38+
<module name="EmptyForInitializerPad"/>
39+
<module name="EmptyForIteratorPad">
40+
<property name="option" value="space"/>
41+
</module>
42+
<module name="MethodParamPad">
43+
<property name="allowLineBreaks" value="true"/>
44+
<property name="option" value="nospace"/>
45+
</module>
46+
<module name="ParenPad"/>
47+
<module name="TypecastParenPad"/>
48+
<module name="NeedBraces"/>
49+
<module name="LeftCurly">
50+
<property name="option" value="nl"/>
51+
<property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, METHOD_DEF"/>
52+
</module>
53+
<module name="LeftCurly">
54+
<property name="option" value="eol"/>
55+
<property name="tokens" value="
56+
LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR,
57+
LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE"/>
58+
</module>
59+
<module name="RightCurly">
60+
<property name="option" value="alone"/>
61+
</module>
62+
<module name="GenericWhitespace"/>
63+
<module name="WhitespaceAfter"/>
64+
<module name="NoWhitespaceBefore"/>
65+
66+
<module name="UpperEll"/>
67+
<module name="DefaultComesLast"/>
68+
<module name="ArrayTypeStyle"/>
69+
<module name="MultipleVariableDeclarations"/>
70+
<module name="ModifierOrder"/>
71+
<module name="OneStatementPerLine"/>
72+
<module name="StringLiteralEquality"/>
73+
<module name="MutableException"/>
74+
<module name="EqualsHashCode"/>
75+
<module name="InnerAssignment"/>
76+
<module name="InterfaceIsType"/>
77+
<module name="HideUtilityClassConstructor"/>
78+
79+
<module name="MemberName"/>
80+
<module name="LocalVariableName"/>
81+
<module name="LocalFinalVariableName"/>
82+
<module name="TypeName"/>
83+
<module name="PackageName"/>
84+
<module name="ParameterName"/>
85+
<module name="StaticVariableName"/>
86+
<module name="ClassTypeParameterName">
87+
<property name="format" value="^[A-Z][0-9]?$"/>
88+
</module>
89+
<module name="MethodTypeParameterName">
90+
<property name="format" value="^[A-Z][0-9]?$"/>
91+
</module>
92+
93+
<module name="WhitespaceAround">
94+
<property name="allowEmptyConstructors" value="true"/>
95+
<property name="allowEmptyMethods" value="true"/>
96+
<property name="ignoreEnhancedForColon" value="false"/>
97+
<property name="tokens" value="
98+
ASSIGN, BAND, BAND_ASSIGN, BOR, BOR_ASSIGN, BSR, BSR_ASSIGN,
99+
BXOR, BXOR_ASSIGN, COLON, DIV, DIV_ASSIGN, EQUAL, GE, GT, LAND, LE,
100+
LITERAL_ASSERT, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE,
101+
LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_RETURN,
102+
LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE,
103+
LOR, LT, MINUS, MINUS_ASSIGN, MOD, MOD_ASSIGN, NOT_EQUAL,
104+
PLUS, PLUS_ASSIGN, QUESTION, SL, SLIST, SL_ASSIGN, SR, SR_ASSIGN,
105+
STAR, STAR_ASSIGN, TYPE_EXTENSION_AND"/>
106+
</module>
107+
</module>
108+
</module>

example/config.yml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
hdfs_example: &hdfs_example
2+
config_files:
3+
- /etc/hadoop/conf/core-site.xml
4+
- /etc/hadoop/conf/hdfs-site.xml
5+
config:
6+
fs.defaultFS: 'hdfs://hadoop-nn1:8020'
7+
fs.hdfs.impl: 'org.apache.hadoop.hdfs.DistributedFileSystem'
8+
fs.file.impl: 'org.apache.hadoop.fs.LocalFileSystem'
9+
10+
local_fs_example: &local_fs_example
11+
config:
12+
fs.defaultFS: 'file:///'
13+
fs.hdfs.impl: 'org.apache.hadoop.fs.RawLocalFileSystem'
14+
fs.file.impl: 'org.apache.hadoop.fs.RawLocalFileSystem'
15+
io.compression.codecs: 'org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec'
16+
17+
in:
18+
type: file
19+
path_prefix: example/data
20+
parser:
21+
charset: UTF-8
22+
newline: CRLF
23+
type: csv
24+
delimiter: ','
25+
quote: '"'
26+
header_line: true
27+
stop_on_invalid_record: true
28+
columns:
29+
- {name: id, type: long}
30+
- {name: account, type: long}
31+
- {name: time, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
32+
- {name: purchase, type: timestamp, format: '%Y%m%d'}
33+
- {name: comment, type: string}
34+
35+
36+
out:
37+
type: hdfs
38+
<<: *local_fs_example
39+
path_prefix: /tmp/embulk-output-hdfs_example/file_
40+
file_ext: csv
41+
delete_in_advance: true
42+
formatter:
43+
type: csv
44+
newline: CRLF
45+
newline_in_field: LF
46+
header_line: true
47+
charset: UTF-8
48+
quote_policy: NONE
49+
quote: '"'
50+
escape: '\'
51+
null_string: ''
52+
default_timezone: UTC

example/data.csv

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
id,account,time,purchase,comment
2+
1,32864,2015-01-27 19:23:49,20150127,embulk
3+
2,14824,2015-01-27 19:01:23,20150127,embulk jruby
4+
3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin"
5+
4,11270,2015-01-29 11:54:36,20150129,NULL

gradle/wrapper/gradle-wrapper.jar

2.56 KB
Binary file not shown.
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
#Tue Aug 11 00:26:20 PDT 2015
1+
#Wed Jan 13 12:41:02 JST 2016
22
distributionBase=GRADLE_USER_HOME
33
distributionPath=wrapper/dists
44
zipStoreBase=GRADLE_USER_HOME
55
zipStorePath=wrapper/dists
6-
distributionUrl=https\://services.gradle.org/distributions/gradle-2.6-bin.zip
6+
distributionUrl=https\://services.gradle.org/distributions/gradle-2.10-bin.zip

0 commit comments

Comments
 (0)