SQL中的多表查询

1、笛卡尔积：举例

2、根据连接条件的不同：

（*）等值连接

（*）不等值连接

（*）外连接

（*）自连接

二、多表查询：等值连接

查询员工信息：部门名称员工姓名

select d.dname,e.ename

from emp e,dept d

where e.deptno=d.deptno;

三、多表查询：自连接：就是通过表的别名，将同一张表视为多张表

查询员工信息：老板姓名员工姓名

条件：员工的老板号 === 老板的员工号

select b.ename,e.ename

from emp e,emp b

where e.mgr=b.empno;

===========================================================

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class SelfJoinMapper extends Mapper<LongWritable, Text, LongWritable, Text> {

@Override
protected void map(LongWritable key1, Text value1, Context context)
throws IOException, InterruptedException {
// 数据：7698,BLAKE,MANAGER,7839,1981/5/1,2850,,30
String data = value1.toString();

//分词
String[] words = data.split(",");

//输出
//1、作为老板表
context.write(new LongWritable(Long.parseLong(words[0])), new Text("*"+words[1]));

//2、作为员工表
try{
context.write(new LongWritable(Long.parseLong(words[3])), new Text(words[1]));
}catch(Exception ex){
//如果产生例外，表示：大老板
context.write(new LongWritable(-1), new Text(words[1]));
}
}

}

-----------------------------------------------------------------------------------------------------------

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class SelfJoinReducer extends Reducer<LongWritable, Text, Text, Text> {

@Override
protected void reduce(LongWritable k3, Iterable<Text> v3, Context context)
throws IOException, InterruptedException {
// 定义变量：老板姓名员工姓名
String bossName = "";
String empNameList = "";

for(Text v:v3){
String str = v.toString();

//判断是否存在*号
int index = str.indexOf("*");
if(index >=0){
//代表老板姓名
bossName = str.substring(1);
}else{
//代表员工姓名
empNameList = str + ";" + empNameList;
}
}

//输出
//判断：如果存在老板姓名和员工姓名，才输出
if(bossName.length() > 0 && empNameList.length() > 0)
context.write(new Text(bossName), new Text("("+empNameList+")"));
}
}

---------------------------------------------------------------------------------------

package demp.selfjoin;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class SelfJoinMain {

public static void main(String[] args) throws Exception {
//创建一个job = mapper + reducer
Job job = Job.getInstance(new Configuration());
//指定job的入口
job.setJarByClass(SelfJoinMain.class);

//指定任务的mapper和输出数据类型
job.setMapperClass(SelfJoinMapper.class);
job.setMapOutputKeyClass(LongWritable.class); //指定k2的类型
job.setMapOutputValueClass(Text.class);//指定v2的数据类型

//指定任务的reducer和输出数据类型
job.setReducerClass(SelfJoinReducer.class);
job.setOutputKeyClass(Text.class);//指定k4的类型
job.setOutputValueClass(Text.class);//指定v4的类型

//指定输入的路径和输出的路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));

//执行任务
job.waitForCompletion(true);

}

}

===============================================================

SQL中的多表查询

package demp.mutiltable;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class MutilTableQueryMapper extends Mapper<LongWritable, Text, LongWritable,Text> {

@Override
protected void map(LongWritable key1, Text value1, Context context)
throws IOException, InterruptedException {
String data = value1.toString();

//分词
String[] words = data.split(",");

//判断数组的长度
if(words.length == 3){
//部门表：部门号部门名称
context.write(new LongWritable(Long.parseLong(words[0])), new Text("*"+words[1]));
}else{
//员工表: 员工的部门号员工姓名
context.write(new LongWritable(Long.parseLong(words[7])), new Text(words[1]));
}
}

}

-------------------------------------------------------------------------------------------------------

ackage demp.mutiltable;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class MutilTableQueryReducer extends Reducer<LongWritable,Text, Text, Text> {

@Override
protected void reduce(LongWritable k3, Iterable<Text> v3, Context context)
throws IOException, InterruptedException {
//定义变量保存部门名称、员工姓名
String dname = "";
String empNameList = "";

for(Text t:v3){
String str = t.toString();

//找到*号的位置
int index = str.indexOf("*");
if(index >=0){
//代表部门名称
dname = str.substring(1);
}else{
//代表员工姓名
empNameList = str + ";"+empNameList;
}
}

//输出
context.write(new Text(dname), new Text(empNameList));
}
}

---------------------------------------------------------------------------------------------

package demp.mutiltable;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MutilTableQueryMain {

public static void main(String[] args) throws Exception {
//创建一个job = mapper + reducer
Job job = Job.getInstance(new Configuration());
//指定job的入口
job.setJarByClass(MutilTableQueryMain.class);

//指定任务的mapper和输出数据类型
job.setMapperClass(MutilTableQueryMapper.class);
job.setMapOutputKeyClass(LongWritable.class); //指定k2的类型
job.setMapOutputValueClass(Text.class);//指定v2的数据类型

//指定任务的reducer和输出数据类型
job.setReducerClass(MutilTableQueryReducer.class);
job.setOutputKeyClass(Text.class);//指定k4的类型
job.setOutputValueClass(Text.class);//指定v4的类型

//指定输入的路径和输出的路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));

//执行任务
job.waitForCompletion(true);

}

}