使用hadoop url读取
package com.sweetop.styhadoop; import org.apache.hadoop.fs.FsUrlStreamHandlerFactory; import org.apache.hadoop.io.IOUtils; import java.io.InputStream; import java.net.URL; /** * Created with IntelliJ IDEA. * User: lastsweetop * Date: 13-5-31 * Time: 上午10:16 * To change this template use File | Settings | File Templates. */ public class URLCat { static { URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory()); } public static void main(String[] args) throws Exception { InputStream in = null; try { in = new URL(args[0]).openStream(); IOUtils.copyBytes(in, System.out, 4096, false); } finally { IOUtils.closeStream(in); } } }
使用FileSystem API读取数据
package com.sweetop.styhadoop; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import java.io.InputStream; import java.net.URI; /** * Created with IntelliJ IDEA. * User: lastsweetop * Date: 13-5-31 * Time: 上午11:24 * To change this template use File | Settings | File Templates. */ public class FileSystemCat { public static void main(String[] args) throws Exception { String uri=args[0]; Configuration conf=new Configuration(); FileSystem fs=FileSystem.get(URI.create(uri),conf); InputStream in=null; try { in=fs.open(new Path(uri)); IOUtils.copyBytes(in, System.out, 4096, false); } finally { IOUtils.closeStream(in); } } }
public interface Seekable { void seek(long l) throws java.io.IOException; long getPos() throws java.io.IOException; boolean seekToNewSource(long l) throws java.io.IOException; }seek方法可跳到文件中的任意位置,我们这里跳到文件的初始位置再重新读一次
public class FileSystemDoubleCat { public static void main(String[] args) throws Exception { String uri = args[0]; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); FSDataInputStream in=null; try { in = fs.open(new Path(uri)); IOUtils.copyBytes(in, System.out, 4096, false); in.seek(0); IOUtils.copyBytes(in, System.out, 4096, false); } finally { IOUtils.closeStream(in); } } }FSDataInputStream还实现了PositionedReadable接口,
public interface PositionedReadable { int read(long l, byte[] bytes, int i, int i1) throws java.io.IOException; void readFully(long l, byte[] bytes, int i, int i1) throws java.io.IOException; void readFully(long l, byte[] bytes) throws java.io.IOException; }可以在任意位置(第一个参数),偏移量(第三个参数),长度(第四个参数),到数组中(第二个参数)
public FSDataOutputStream create(Path f) throws IOException它还有很多重载方法,可以指定是否强制覆盖已存在的文件,文件的重复因子,写缓存的大小,文件的块大小,文件的权限等。
public interface Progressable { void progress(); }和普通文件系统一样,也支持apend操作,写日志时最常用
public FSDataOutputStream append(Path f) throws IOException
package com.sweetop.styhadoop; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.Progressable; import java.io.BufferedInputStream; import java.io.FileInputStream; import java.io.InputStream; import java.io.OutputStream; import java.net.URI; /** * Created with IntelliJ IDEA. * User: lastsweetop * Date: 13-6-2 * Time: 下午4:54 * To change this template use File | Settings | File Templates. */ public class FileCopyWithProgress { public static void main(String[] args) throws Exception { String localSrc = args[0]; String dst = args[1]; InputStream in = new BufferedInputStream(new FileInputStream(localSrc)); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(dst), conf); OutputStream out = fs.create(new Path(dst), new Progressable() { @Override public void progress() { System.out.print("."); } }); IOUtils.copyBytes(in, out, 4096, true);
FileStatus 封装了hdfs文件和目录的元数据,包括文件的长度,块大小,重复数,修改时间,所有者,权限等信息,FileSystem的getFileStatus可以获得这些信息,
package com.sweetop.styhadoop; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import java.io.IOException; import java.net.URI; /** * Created with IntelliJ IDEA. * User: lastsweetop * Date: 13-6-2 * Time: 下午8:58 * To change this template use File | Settings | File Templates. */ public class ShowFileStatus { public static void main(String[] args) throws IOException { Path path = new Path(args[0]); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(args[0]), conf); FileStatus status = fs.getFileStatus(path); System.out.println("path = " + status.getPath()); System.out.println("owner = " + status.getOwner()); System.out.println("block size = " + status.getBlockSize()); System.out.println("permission = " + status.getPermission()); System.out.println("replication = " + status.getReplication()); } }
Listing files
package com.sweetop.styhadoop; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import java.io.IOException; import java.net.URI; /** * Created with IntelliJ IDEA. * User: lastsweetop * Date: 13-6-2 * Time: 下午10:09 * To change this template use File | Settings | File Templates. */ public class ListStatus { public static void main(String[] args) throws IOException { String uri = args[0]; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); Path[] paths = new Path[args.length]; for (int i = 0; i < paths.length; i++) { paths[i] = new Path(args[i]); } FileStatus[] status = fs.listStatus(paths); Path[] listedPaths = FileUtil.stat2Paths(status); for (Path p : listedPaths) { System.out.println(p); } } }
package com.sweetop.styhadoop; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; /** * Created with IntelliJ IDEA. * User: lastsweetop * Date: 13-6-3 * Time: 下午2:49 * To change this template use File | Settings | File Templates. */ public class RegexExludePathFilter implements PathFilter { private final String regex; public RegexExludePathFilter(String regex) { this.regex = regex; } @Override public boolean accept(Path path) { return !path.toString().matches(regex); } }
File patterns
package com.sweetop.styhadoop; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import java.io.IOException; import java.net.URI; /** * Created with IntelliJ IDEA. * User: lastsweetop * Date: 13-6-3 * Time: 下午2:37 * To change this template use File | Settings | File Templates. */ public class GlobStatus { public static void main(String[] args) throws IOException { String uri = args[0]; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); FileStatus[] status = fs.globStatus(new Path(uri),new RegexExludePathFilter("^.*/1901")); Path[] listedPaths = FileUtil.stat2Paths(status); for (Path p : listedPaths) { System.out.println(p); } } }
public abstract boolean delete(Path f, boolean recursive) throws IOException
作者:lastsweetop 发表于2013-6-3 21:28:09 原文链接
阅读:10 评论:0 查看评论